Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change addresses to use 64 bits. Fixes errors with generic addressing. #200

Open
wants to merge 1 commit into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/abstract_hardware_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ enum AdaptiveCache { FIXED = 0, ADAPTIVE_VOLTA = 1 };

typedef unsigned long long new_addr_type;
typedef unsigned long long cudaTextureObject_t;
typedef unsigned address_type;
typedef unsigned addr_t;
typedef unsigned long long address_type;
typedef unsigned long long addr_t;

// the following are operations the timing model can see
#define SPECIALIZED_UNIT_NUM 8
Expand Down
50 changes: 25 additions & 25 deletions src/cuda-sim/instructions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -398,8 +398,8 @@ ptx_reg_t ptx_thread_info::get_operand_value(const operand_info &op,
// global memory - g[4], g[$r0]
mem = thread->get_global_memory();
type_info_key::type_decode(opType, size, t);
mem->read(result.u32, size / 8, &finalResult.u128);
thread->m_last_effective_address = result.u32;
mem->read(result.u64, size / 8, &finalResult.u128);
thread->m_last_effective_address = result.u64;
thread->m_last_memory_space = global_space;

if (opType == S16_TYPE || opType == S32_TYPE)
Expand All @@ -408,8 +408,8 @@ ptx_reg_t ptx_thread_info::get_operand_value(const operand_info &op,
// shared memory - s[4], s[$r0]
mem = thread->m_shared_mem;
type_info_key::type_decode(opType, size, t);
mem->read(result.u32, size / 8, &finalResult.u128);
thread->m_last_effective_address = result.u32;
mem->read(result.u64, size / 8, &finalResult.u128);
thread->m_last_effective_address = result.u64;
thread->m_last_memory_space = shared_space;

if (opType == S16_TYPE || opType == S32_TYPE)
Expand All @@ -418,18 +418,18 @@ ptx_reg_t ptx_thread_info::get_operand_value(const operand_info &op,
// const memory - ce0c1[4], ce0c1[$r0]
mem = thread->get_global_memory();
type_info_key::type_decode(opType, size, t);
mem->read((result.u32 + op.get_const_mem_offset()), size / 8,
mem->read((result.u64 + op.get_const_mem_offset()), size / 8,
&finalResult.u128);
thread->m_last_effective_address = result.u32;
thread->m_last_effective_address = result.u64;
thread->m_last_memory_space = const_space;
if (opType == S16_TYPE || opType == S32_TYPE)
sign_extend(finalResult, size, dstInfo);
} else if ((op.get_addr_space() == local_space) && (derefFlag)) {
// local memory - l0[4], l0[$r0]
mem = thread->m_local_mem;
type_info_key::type_decode(opType, size, t);
mem->read(result.u32, size / 8, &finalResult.u128);
thread->m_last_effective_address = result.u32;
mem->read(result.u64, size / 8, &finalResult.u128);
thread->m_last_effective_address = result.u64;
thread->m_last_memory_space = local_space;
if (opType == S16_TYPE || opType == S32_TYPE)
sign_extend(finalResult, size, dstInfo);
Expand Down Expand Up @@ -748,8 +748,8 @@ void ptx_thread_info::set_operand_value(const operand_info &dst,
mem = thread->get_global_memory();
type_info_key::type_decode(type, size, t);

mem->write(dstData.u32, size / 8, &data.u128, thread, pI);
thread->m_last_effective_address = dstData.u32;
mem->write(dstData.u64, size / 8, &data.u128, thread, pI);
thread->m_last_effective_address = dstData.u64;
thread->m_last_memory_space = global_space;
}

Expand All @@ -759,8 +759,8 @@ void ptx_thread_info::set_operand_value(const operand_info &dst,
mem = thread->m_shared_mem;
type_info_key::type_decode(type, size, t);

mem->write(dstData.u32, size / 8, &data.u128, thread, pI);
thread->m_last_effective_address = dstData.u32;
mem->write(dstData.u64, size / 8, &data.u128, thread, pI);
thread->m_last_effective_address = dstData.u64;
thread->m_last_memory_space = shared_space;
}

Expand All @@ -770,8 +770,8 @@ void ptx_thread_info::set_operand_value(const operand_info &dst,
mem = thread->m_local_mem;
type_info_key::type_decode(type, size, t);

mem->write(dstData.u32, size / 8, &data.u128, thread, pI);
thread->m_last_effective_address = dstData.u32;
mem->write(dstData.u64, size / 8, &data.u128, thread, pI);
thread->m_last_effective_address = dstData.u64;
thread->m_last_memory_space = local_space;
}

Expand Down Expand Up @@ -3367,7 +3367,7 @@ void ld_exec(const ptx_instruction *pI, ptx_thread_info *thread) {
unsigned vector_spec = pI->get_vector();

memory_space *mem = NULL;
addr_t addr = src1_data.u32;
addr_t addr = src1_data.u64;

decode_space(space, thread, src1, mem, addr);

Expand Down Expand Up @@ -3440,7 +3440,7 @@ void mma_st_impl(const ptx_instruction *pI, core_t *core, warp_inst_t &inst) {
memory_space_t space = pI->get_space();

memory_space *mem = NULL;
addr_t addr = addr_reg.u32;
addr_t addr = addr_reg.u64;

new_addr_type mem_txn_addr[MAX_ACCESSES_PER_INSN_PER_THREAD];
int num_mem_txn = 0;
Expand All @@ -3455,7 +3455,7 @@ void mma_st_impl(const ptx_instruction *pI, core_t *core, warp_inst_t &inst) {
type_info_key::type_decode(type, size, t);
if (core->get_gpu()->gpgpu_ctx->debug_tensorcore)
printf("mma_st: thrd=%d, addr=%x, fp(size=%zu), stride=%d\n", thrd,
addr_reg.u32, size, src2_data.u32);
addr_reg.u64, size, src2_data.u32);
addr_t new_addr =
addr + thread_group_offset(thrd, wmma_type, wmma_layout, type, stride) *
size / 8;
Expand Down Expand Up @@ -3560,7 +3560,7 @@ void mma_ld_impl(const ptx_instruction *pI, core_t *core, warp_inst_t &inst) {
memory_space_t space = pI->get_space();

memory_space *mem = NULL;
addr_t addr = src1_data.u32;
addr_t addr = src1_data.u64;
smid = thread->get_hw_sid();
if (whichspace(addr) == shared_space) {
addr = generic_to_shared(smid, addr);
Expand All @@ -3573,7 +3573,7 @@ void mma_ld_impl(const ptx_instruction *pI, core_t *core, warp_inst_t &inst) {
ptx_reg_t data[16];
if (core->get_gpu()->gpgpu_ctx->debug_tensorcore)
printf("mma_ld: thrd=%d,addr=%x, fpsize=%zu, stride=%d\n", thrd,
src1_data.u32, size, src2_data.u32);
src1_data.u64, size, src2_data.u32);

addr_t new_addr =
addr + thread_group_offset(thrd, wmma_type, wmma_layout, type, stride) *
Expand Down Expand Up @@ -5667,7 +5667,7 @@ void sst_impl(const ptx_instruction *pI, ptx_thread_info *thread) {
memory_space_t space = pI->get_space();
memory_space *mem = NULL;
addr_t addr =
src2_data.u32 * 4; // this assumes sstarr memory starts at address 0
src2_data.u64 * 4; // this assumes sstarr memory starts at address 0
ptx_cta_info *cta_info = thread->m_cta_info;

decode_space(space, thread, src1, mem, addr);
Expand Down Expand Up @@ -5707,7 +5707,7 @@ void sst_impl(const ptx_instruction *pI, ptx_thread_info *thread) {
// squeeze the zeros out of the array and store data back into original
// array
mem = NULL;
addr = src1_data.u32;
addr = src1_data.u64;
space.set_type(global_space);
decode_space(space, thread, src1, mem, addr);
// store nonzero entries and indices
Expand Down Expand Up @@ -5754,7 +5754,7 @@ void st_impl(const ptx_instruction *pI, ptx_thread_info *thread) {
unsigned vector_spec = pI->get_vector();

memory_space *mem = NULL;
addr_t addr = addr_reg.u32;
addr_t addr = addr_reg.u64;

decode_space(space, thread, dst, mem, addr);

Expand Down Expand Up @@ -6490,13 +6490,13 @@ ptx_reg_t srcOperandModifiers(ptx_reg_t opData, operand_info opInfo,
if (opInfo.get_addr_space() == global_space) {
mem = thread->get_global_memory();
type_info_key::type_decode(type, size, t);
mem->read(opData.u32, size / 8, &result.u64);
mem->read(opData.u64, size / 8, &result.u64);
if (type == S16_TYPE || type == S32_TYPE)
sign_extend(result, size, dstInfo);
} else if (opInfo.get_addr_space() == shared_space) {
mem = thread->m_shared_mem;
type_info_key::type_decode(type, size, t);
mem->read(opData.u32, size / 8, &result.u64);
mem->read(opData.u64, size / 8, &result.u64);

if (type == S16_TYPE || type == S32_TYPE)
sign_extend(result, size, dstInfo);
Expand All @@ -6505,7 +6505,7 @@ ptx_reg_t srcOperandModifiers(ptx_reg_t opData, operand_info opInfo,
mem = thread->get_global_memory();
type_info_key::type_decode(type, size, t);

mem->read((opData.u32 + opInfo.get_const_mem_offset()), size / 8,
mem->read((opData.u64 + opInfo.get_const_mem_offset()), size / 8,
&result.u64);

if (type == S16_TYPE || type == S32_TYPE)
Expand Down