Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions util/tracer_nvbit/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ tracer_tool/tracer_tool.o
tracer_tool/tracer_tool.so
tracer_tool/inject_funcs.o
tracer_tool/traces-processing/post-traces-processing
traceDsm
traceAsm
8 changes: 5 additions & 3 deletions util/tracer_nvbit/tracer_tool/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,18 @@ current_dir := $(notdir $(patsubst %/,%,$(dir $(mkfile_path))))

NVBIT_TOOL=$(current_dir).so

all: $(NVBIT_TOOL)
all: $(NVBIT_TOOL) traceDsm

$(NVBIT_TOOL): $(OBJECTS) $(NVBIT_PATH)/libnvbit.a
$(NVCC) -arch=$(ARCH) -O3 $(OBJECTS) $(LIBS) $(NVCC_PATH) -lcuda -lcudart_static -shared -o $@

%.o: %.cu common.h
$(NVCC) -dc -c -std=c++11 $(INCLUDES) -Xptxas -cloning=no -Xcompiler -Wall -arch=$(ARCH) -O3 -Xcompiler -fPIC $< -o $@
$(NVCC) -dc -c -std=c++11 $(INCLUDES) -Xptxas -cloning=no -Xcompiler -Wall -Xcompiler -Wno-unused-result -arch=$(ARCH) -O3 -Xcompiler -fPIC $< -o $@

inject_funcs.o: inject_funcs.cu common.h
$(NVCC) $(INCLUDES) $(MAXRREGCOUNT_FLAG) -Xptxas -astoolspatch --keep-device-functions -arch=$(ARCH) -Xcompiler -Wall -Xcompiler -fPIC -c $< -o $@
$(NVCC) $(INCLUDES) $(MAXRREGCOUNT_FLAG) -Xptxas -astoolspatch --keep-device-functions -arch=$(ARCH) -Xcompiler -Wall -Xcompiler -Wno-unused-result -Xcompiler -fPIC -c $< -o $@

traceDsm: traceDsm.cpp common.h
$(CXX) -std=c++17 -O3 -g -Wno-unused-result -o $@ $^
clean:
rm -f *.so *.o
215 changes: 202 additions & 13 deletions util/tracer_nvbit/tracer_tool/common.h
Original file line number Diff line number Diff line change
@@ -1,26 +1,51 @@
/* Author1: Mahmoud Khairy, [email protected] - 2019 */
/* Author2: Jason Shen, [email protected] - 2019 */

#include <stdint.h>
#ifndef COMMON_H
#define COMMON_H

static __managed__ uint64_t total_dynamic_instr_counter = 0;
static __managed__ uint64_t reported_dynamic_instr_counter = 0;
static __managed__ bool stop_report = false;
#include <assert.h>
#include <cstdint>
#include <cstdio>
#include <stdexcept>
#include <stdint.h>
#include <string>

/* information collected in the instrumentation function and passed
* on the channel from the GPU to the CPU */
#define MAX_SRC 5
#define MAX_OPCODE_LENGTH 32

typedef enum : uint8_t {
INST_BASE = 0,
INST_FLAT,
INST_DELTA,
INST_STRIDE,
} inst_type_t;

typedef struct {
int cta_id_x;
int cta_id_y;
int cta_id_z;
int warpid_tb;
int warpid_sm;
int sm_id;
int opcode_id;
uint64_t addrs[32];
uint32_t line_num;
unsigned kernel_id;
unsigned grid_dim_x;
unsigned grid_dim_y;
unsigned grid_dim_z;
unsigned block_dim_x;
unsigned block_dim_y;
unsigned block_dim_z;
unsigned shared_mem_bytes;
unsigned shmem;
unsigned nregs;
unsigned binary_version;
uint64_t cuda_stream_id;
uint64_t shmem_base_addr;
uint64_t local_mem_base_addr;
char nvbit_version[10];
char accelsim_tracer_version[10];
bool enable_lineinfo;
} kernel_header;

typedef struct {
char opcode[MAX_OPCODE_LENGTH];

uint32_t vpc;
bool is_mem;
int32_t GPRDst;
Expand All @@ -30,4 +55,168 @@ typedef struct {
uint32_t active_mask;
uint32_t predicate_mask;
uint64_t imm;
} sim_inst_trace_t;

typedef struct {
sim_inst_trace_t base;
uint64_t addrs[32];
} sim_inst_trace_flat_t;

typedef struct {
sim_inst_trace_t base;

uint64_t base_addr;
int32_t delta[32];
} sim_inst_trace_delta_t;

typedef struct {
sim_inst_trace_t base;
uint64_t base_addr;
int32_t stride;
} sim_inst_trace_stride_t;

typedef struct {
sim_inst_trace_t base;

int cta_id_x;
int cta_id_y;
int cta_id_z;
int warpid_tb;
int warpid_sm;
int sm_id;
int opcode_id;
uint32_t line_num;
uint64_t addrs[32];
} inst_trace_t;

typedef union {
sim_inst_trace_t sim_inst_base;
sim_inst_trace_delta_t sim_inst_delta;
sim_inst_trace_flat_t sim_inst_flat;
sim_inst_trace_stride_t sim_inst_stride;

} sim_inst_u;

// unsigned get_inst_size(inst_type_t type) {
// switch (type) {
// case INST_BASE:
// return sizeof(sim_inst_trace_t);
// case INST_FLAT:
// return sizeof(sim_inst_trace_flat_t);
// case INST_DELTA:
// return sizeof(sim_inst_trace_delta_t);
// case INST_STRIDE:
// return sizeof(sim_inst_trace_stride_t);
// default:
// assert(0);
// exit(1);
// }
// }

inline bool hasEnding(const std::string &fullString,
const std::string &ending) {
if (fullString.length() >= ending.length()) {
return (0 == fullString.compare(fullString.length() - ending.length(),
ending.length(), ending));
}
return false;
}

/**
* Opens a file for reading, automatically handling .xz decompression
* @param filepath Path to the file to open
* @return FILE pointer to the opened file/pipe, or nullptr on failure
* @throws std::runtime_error if file type is unsupported or opening fails
*/
inline FILE *openFileForReading(const std::string &filepath) {
FILE *file = nullptr;

if (hasEnding(filepath, ".xz")) {
// Use xz command to decompress .xz files
std::string command = "xz -dc " + filepath;
file = popen(command.c_str(), "r");
} else if (hasEnding(filepath, ".trace") || hasEnding(filepath, ".traceg")) {
// Use fopen for regular trace files
file = fopen(filepath.c_str(), "rb");
} else {
throw std::runtime_error("Unsupported file type: " + filepath);
}

if (!file) {
throw std::runtime_error("Failed to open file for reading: " + filepath);
}

return file;
}

/**
* Opens a file for writing, automatically handling .xz compression
* @param filepath Base filepath (without extension)
* @param use_xz_compression Whether to compress with xz
* @return FILE pointer to the opened file/pipe, or nullptr on failure
* @throws std::runtime_error if opening fails
*/
inline FILE *openFileForWriting(const std::string &filepath,
bool use_xz_compression = false) {
FILE *file = nullptr;

if (use_xz_compression) {
std::string command = "xz -1 -T0 > " + filepath + ".xz";
file = popen(command.c_str(), "w");
if (!file) {
throw std::runtime_error(
"Failed to open xz compression pipe for: " + filepath + ".xz");
}
} else {
file = fopen(filepath.c_str(), "wb");
if (!file) {
throw std::runtime_error("Failed to open file for writing: " + filepath);
}
}

return file;
}

/**
* Opens a file for writing with xz compression using a command buffer
* @param base_filepath Base filepath (without extension)
* @param cmd_buffer Buffer to store the xz command
* @param buffer_size Size of the command buffer
* @return FILE pointer to the opened pipe, or nullptr on failure
* @throws std::runtime_error if buffer is too small or opening fails
*/
inline FILE *openFileForWritingXz(const std::string &base_filepath,
char *cmd_buffer, size_t buffer_size) {
if (snprintf(cmd_buffer, buffer_size, "xz -1 -T0 > %s.xz",
base_filepath.c_str()) >= (int)buffer_size) {
throw std::runtime_error("Command buffer too small for xz compression");
}

FILE *file = popen(cmd_buffer, "w");
if (!file) {
throw std::runtime_error(
"Failed to open xz compression pipe for: " + base_filepath + ".xz");
}

return file;
}

/**
* Generates output filepath based on input filepath and desired extension
* @param input_filepath Input file path
* @param new_extension New extension to append (without dot)
* @return Output filepath with new extension
*/
inline std::string generateOutputFilepath(const std::string &input_filepath,
const std::string &new_extension) {
if (hasEnding(input_filepath, ".xz")) {
// Remove .xz and add new extension
return input_filepath.substr(0, input_filepath.find_last_of(".")) +
new_extension;
} else {
// Add new extension to existing path
return input_filepath + "." + new_extension;
}
}

#endif
30 changes: 15 additions & 15 deletions util/tracer_nvbit/tracer_tool/inject_funcs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,17 @@ instrument_inst(int pred, int opcode_id, int32_t vpc, bool is_mem,
}
}

inst_trace_t ma;
inst_trace_t ma = {0};

if (is_mem) {
/* collect memory address information */
for (int i = 0; i < 32; i++) {
ma.addrs[i] = __shfl_sync(active_mask, addr, i);
}
ma.width = width;
ma.is_mem = true;
ma.base.width = width;
ma.base.is_mem = true;
} else {
ma.is_mem = false;
ma.base.is_mem = false;
}

int4 cta = get_ctaid();
Expand All @@ -62,17 +62,17 @@ instrument_inst(int pred, int opcode_id, int32_t vpc, bool is_mem,
ma.cta_id_z = cta.z;
ma.warpid_sm = get_warpid();
ma.opcode_id = opcode_id;
ma.vpc = vpc;
ma.GPRDst = desReg;
ma.GPRSrcs[0] = srcReg1;
ma.GPRSrcs[1] = srcReg2;
ma.GPRSrcs[2] = srcReg3;
ma.GPRSrcs[3] = srcReg4;
ma.GPRSrcs[4] = srcReg5;
ma.numSrcs = srcNum;
ma.imm = immediate;
ma.active_mask = active_mask;
ma.predicate_mask = predicate_mask;
ma.base.vpc = vpc;
ma.base.GPRDst = desReg;
ma.base.GPRSrcs[0] = srcReg1;
ma.base.GPRSrcs[1] = srcReg2;
ma.base.GPRSrcs[2] = srcReg3;
ma.base.GPRSrcs[3] = srcReg4;
ma.base.GPRSrcs[4] = srcReg5;
ma.base.numSrcs = srcNum;
ma.base.imm = immediate;
ma.base.active_mask = active_mask;
ma.base.predicate_mask = predicate_mask;
ma.sm_id = get_smid();

/* first active lane pushes information on the channel */
Expand Down
Loading