From f829a12add380c78e50ecc86357d4a1b513f21f0 Mon Sep 17 00:00:00 2001 From: boyealkim Date: Thu, 25 Jul 2019 20:42:13 +0900 Subject: [PATCH 1/4] fix stream_merge front algo --- src/stream_manager.cc | 50 +++++++++++++++++++++++++++---------------- src/stream_manager.h | 12 +++++++++-- 2 files changed, 41 insertions(+), 21 deletions(-) diff --git a/src/stream_manager.cc b/src/stream_manager.cc index 6cd62a228..6e02dcdb4 100644 --- a/src/stream_manager.cc +++ b/src/stream_manager.cc @@ -194,14 +194,17 @@ bool stream_operation::do_operation( gpgpu_sim *gpu ) m_stream->record_next_done(); } break; - case stream_wait_event: { + case stream_wait_event: //only allows next op to go if event is done //otherwise stays in the stream queue printf("stream wait event processing...\n"); - if(m_event->done()) + if(m_event->done()){ printf("stream wait event done\n"); m_stream->record_next_done(); } + else{ + return false; + } break; default: abort(); @@ -232,6 +235,7 @@ stream_manager::stream_manager( gpgpu_sim *gpu, bool cuda_launch_blocking ) m_service_stream_zero = false; m_cuda_launch_blocking = cuda_launch_blocking; pthread_mutex_init(&m_lock,NULL); + m_last_stream = m_streams.begin(); } bool stream_manager::operation( bool * sim) @@ -330,22 +334,31 @@ stream_operation stream_manager::front() m_service_stream_zero = false; } } - if(!m_service_stream_zero) { - std::list::iterator s; - for( s=m_streams.begin(); s != m_streams.end(); s++) { - CUstream_st *stream = *s; - if( !stream->busy() && !stream->empty() ) { - result = stream->next(); - if( result.is_kernel() ) { - unsigned grid_id = result.get_kernel()->get_uid(); - m_grid_id_to_stream[grid_id] = stream; - } - break; - } - } - } + std::list::iterator s = m_last_stream; + if(m_last_stream == m_streams.end()){ + s = m_streams.begin(); + } + else{ + s++; + } + for(size_t ii = 0 ; ii < m_streams.size(); ii++, s++) { + if(s == m_streams.end()){ + s = m_streams.begin(); + } + m_last_stream = s; + CUstream_st *stream = *s; + if( !stream->busy() && !stream->empty() ) { + result = stream->next(); + if( result.is_kernel() ) { + unsigned grid_id = result.get_kernel()->get_uid(); + m_grid_id_to_stream[grid_id] = stream; + } + break; + } + } + } return result; } @@ -370,7 +383,8 @@ void stream_manager::destroy_stream( CUstream_st *stream ) break; } } - delete stream; + delete stream; + m_last_stream = m_streams.begin(); pthread_mutex_unlock(&m_lock); } @@ -384,7 +398,6 @@ bool stream_manager::concurrent_streams_empty() for( s=m_streams.begin(); s!=m_streams.end();++s ) { struct CUstream_st *stream = *s; if( !stream->empty() ) { - //stream->print(stdout); result = false; break; } @@ -437,7 +450,6 @@ void stream_manager::print_impl( FILE *fp) void stream_manager::push( stream_operation op ) { struct CUstream_st *stream = op.get_stream(); - // block if stream 0 (or concurrency disabled) and pending concurrent operations exist bool block= !stream || m_cuda_launch_blocking; while(block) { diff --git a/src/stream_manager.h b/src/stream_manager.h index 91d1b362d..ae1af9fa3 100644 --- a/src/stream_manager.h +++ b/src/stream_manager.h @@ -163,7 +163,6 @@ class stream_operation { void print( FILE *fp ) const; struct CUstream_st *get_stream() { return m_stream; } void set_stream( CUstream_st *stream ) { m_stream = stream; } - private: struct CUstream_st *m_stream; @@ -179,9 +178,9 @@ class stream_operation { const char *m_symbol; size_t m_offset; + struct CUevent_st *m_event; bool m_sim_mode; kernel_info_t *m_kernel; - struct CUevent_st *m_event; }; struct CUevent_st { @@ -193,6 +192,7 @@ struct CUevent_st { m_updates = 0; m_wallclock = 0; m_gpu_tot_sim_cycle = 0; + m_issued = 0; m_done = false; } void update( double cycle, time_t clk ) @@ -207,11 +207,18 @@ struct CUevent_st { unsigned num_updates() const { return m_updates; } bool done() const { return m_done; } time_t clock() const { return m_wallclock; } + void issue(){ + m_issued++; + } + unsigned int num_issued() const{ + return m_issued; + } private: int m_uid; bool m_blocking; bool m_done; int m_updates; + unsigned int m_issued; time_t m_wallclock; double m_gpu_tot_sim_cycle; @@ -268,6 +275,7 @@ class stream_manager { CUstream_st m_stream_zero; bool m_service_stream_zero; pthread_mutex_t m_lock; + std::list::iterator m_last_stream; }; #endif From 8e9f0375a6a8bed217b0634a08f7f1f0cd149744 Mon Sep 17 00:00:00 2001 From: boyealkim Date: Thu, 25 Jul 2019 20:31:21 +0900 Subject: [PATCH 2/4] fix stream fix bug and fix cudaStreamWaitEvent impl --- libcuda/cuda_runtime_api.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/libcuda/cuda_runtime_api.cc b/libcuda/cuda_runtime_api.cc index 44f0f4ea4..7005eef58 100644 --- a/libcuda/cuda_runtime_api.cc +++ b/libcuda/cuda_runtime_api.cc @@ -1696,6 +1696,7 @@ __host__ cudaError_t CUDARTAPI cudaEventRecord(cudaEvent_t event, cudaStream_t s if( !e ) return g_last_cudaError = cudaErrorUnknown; struct CUstream_st *s = (struct CUstream_st *)stream; stream_operation op(e,s); + e->issue(); g_stream_manager->push(op); return g_last_cudaError = cudaSuccess; } @@ -1708,9 +1709,13 @@ __host__ cudaError_t CUDARTAPI cudaStreamWaitEvent(cudaStream_t stream, cudaEven //reference: https://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/html/group__CUDART__STREAM_gfe68d207dc965685d92d3f03d77b0876.html CUevent_st *e = get_event(event); if( !e ){ - printf("GPGPU-Sim API: Warning: cudaEventRecord has not been called on event before calling cudaStreamWaitEvent.\nNothing to be done.\n"); - return g_last_cudaError = cudaSuccess; + printf("GPGPU-Sim API: Error at cudaStreamWaitEvent. Event is not created .\n"); + return g_last_cudaError = cudaErrorInvalidResourceHandle; } + else if(e->num_issued() == 0){ + printf("GPGPU-Sim API: Warning: cudaEventRecord has not been called on event before calling cudaStreamWaitEvent.\nNothing to be done.\n"); + return g_last_cudaError = cudaSuccess; + } if (!stream){ g_stream_manager->pushCudaStreamWaitEventToAllStreams(e, flags); } else { From b13557baa4ea6d1084bdcc9b28b9eb660c394427 Mon Sep 17 00:00:00 2001 From: boyealkim Date: Fri, 26 Jul 2019 11:55:45 +0900 Subject: [PATCH 3/4] fix indentation --- libcuda/cuda_runtime_api.cc | 10 +++---- src/stream_manager.cc | 58 ++++++++++++++++++------------------- src/stream_manager.h | 17 +++++------ 3 files changed, 40 insertions(+), 45 deletions(-) diff --git a/libcuda/cuda_runtime_api.cc b/libcuda/cuda_runtime_api.cc index 7005eef58..6a7826ee0 100644 --- a/libcuda/cuda_runtime_api.cc +++ b/libcuda/cuda_runtime_api.cc @@ -1709,13 +1709,13 @@ __host__ cudaError_t CUDARTAPI cudaStreamWaitEvent(cudaStream_t stream, cudaEven //reference: https://www.cs.cmu.edu/afs/cs/academic/class/15668-s11/www/cuda-doc/html/group__CUDART__STREAM_gfe68d207dc965685d92d3f03d77b0876.html CUevent_st *e = get_event(event); if( !e ){ - printf("GPGPU-Sim API: Error at cudaStreamWaitEvent. Event is not created .\n"); - return g_last_cudaError = cudaErrorInvalidResourceHandle; + printf("GPGPU-Sim API: Error at cudaStreamWaitEvent. Event is not created .\n"); + return g_last_cudaError = cudaErrorInvalidResourceHandle; } - else if(e->num_issued() == 0){ - printf("GPGPU-Sim API: Warning: cudaEventRecord has not been called on event before calling cudaStreamWaitEvent.\nNothing to be done.\n"); + else if(e->num_issued() == 0){ + printf("GPGPU-Sim API: Warning: cudaEventRecord has not been called on event before calling cudaStreamWaitEvent.\nNothing to be done.\n"); return g_last_cudaError = cudaSuccess; - } + } if (!stream){ g_stream_manager->pushCudaStreamWaitEventToAllStreams(e, flags); } else { diff --git a/src/stream_manager.cc b/src/stream_manager.cc index 6e02dcdb4..d0eecddba 100644 --- a/src/stream_manager.cc +++ b/src/stream_manager.cc @@ -198,13 +198,13 @@ bool stream_operation::do_operation( gpgpu_sim *gpu ) //only allows next op to go if event is done //otherwise stays in the stream queue printf("stream wait event processing...\n"); - if(m_event->done()){ + if(m_event->done()){ printf("stream wait event done\n"); m_stream->record_next_done(); } - else{ - return false; - } + else{ + return false; + } break; default: abort(); @@ -235,7 +235,7 @@ stream_manager::stream_manager( gpgpu_sim *gpu, bool cuda_launch_blocking ) m_service_stream_zero = false; m_cuda_launch_blocking = cuda_launch_blocking; pthread_mutex_init(&m_lock,NULL); - m_last_stream = m_streams.begin(); + m_last_stream = m_streams.begin(); } bool stream_manager::operation( bool * sim) @@ -336,29 +336,25 @@ stream_operation stream_manager::front() } if(!m_service_stream_zero) { - std::list::iterator s = m_last_stream; - if(m_last_stream == m_streams.end()){ - s = m_streams.begin(); - } - else{ - s++; - } - for(size_t ii = 0 ; ii < m_streams.size(); ii++, s++) { - if(s == m_streams.end()){ - s = m_streams.begin(); - } - m_last_stream = s; - CUstream_st *stream = *s; - if( !stream->busy() && !stream->empty() ) { - result = stream->next(); - if( result.is_kernel() ) { - unsigned grid_id = result.get_kernel()->get_uid(); - m_grid_id_to_stream[grid_id] = stream; - } - break; - } - } - } + std::list::iterator s = m_last_stream; + if(m_last_stream == m_streams.end()){ s = m_streams.begin(); } + else{ s++; } + for(size_t ii = 0 ; ii < m_streams.size(); ii++, s++) { + if(s == m_streams.end()){ + s = m_streams.begin(); + } + m_last_stream = s; + CUstream_st *stream = *s; + if( !stream->busy() && !stream->empty() ) { + result = stream->next(); + if( result.is_kernel() ) { + unsigned grid_id = result.get_kernel()->get_uid(); + m_grid_id_to_stream[grid_id] = stream; + } + break; + } + } + } return result; } @@ -383,8 +379,8 @@ void stream_manager::destroy_stream( CUstream_st *stream ) break; } } - delete stream; - m_last_stream = m_streams.begin(); + delete stream; + m_last_stream = m_streams.begin(); pthread_mutex_unlock(&m_lock); } @@ -398,6 +394,7 @@ bool stream_manager::concurrent_streams_empty() for( s=m_streams.begin(); s!=m_streams.end();++s ) { struct CUstream_st *stream = *s; if( !stream->empty() ) { + //stream->print(stdout); result = false; break; } @@ -450,6 +447,7 @@ void stream_manager::print_impl( FILE *fp) void stream_manager::push( stream_operation op ) { struct CUstream_st *stream = op.get_stream(); + // block if stream 0 (or concurrency disabled) and pending concurrent operations exist bool block= !stream || m_cuda_launch_blocking; while(block) { diff --git a/src/stream_manager.h b/src/stream_manager.h index ae1af9fa3..eb4c6ff06 100644 --- a/src/stream_manager.h +++ b/src/stream_manager.h @@ -163,6 +163,7 @@ class stream_operation { void print( FILE *fp ) const; struct CUstream_st *get_stream() { return m_stream; } void set_stream( CUstream_st *stream ) { m_stream = stream; } + private: struct CUstream_st *m_stream; @@ -178,9 +179,9 @@ class stream_operation { const char *m_symbol; size_t m_offset; - struct CUevent_st *m_event; bool m_sim_mode; kernel_info_t *m_kernel; + struct CUevent_st *m_event; }; struct CUevent_st { @@ -192,7 +193,7 @@ struct CUevent_st { m_updates = 0; m_wallclock = 0; m_gpu_tot_sim_cycle = 0; - m_issued = 0; + m_issued = 0; m_done = false; } void update( double cycle, time_t clk ) @@ -207,18 +208,14 @@ struct CUevent_st { unsigned num_updates() const { return m_updates; } bool done() const { return m_done; } time_t clock() const { return m_wallclock; } - void issue(){ - m_issued++; - } - unsigned int num_issued() const{ - return m_issued; - } + void issue(){ m_issued++; } + unsigned int num_issued() const{ return m_issued; } private: int m_uid; bool m_blocking; bool m_done; int m_updates; - unsigned int m_issued; + unsigned int m_issued; time_t m_wallclock; double m_gpu_tot_sim_cycle; @@ -275,7 +272,7 @@ class stream_manager { CUstream_st m_stream_zero; bool m_service_stream_zero; pthread_mutex_t m_lock; - std::list::iterator m_last_stream; + std::list::iterator m_last_stream; }; #endif From f3ec23390a0798eab1426adf962487680ea89e93 Mon Sep 17 00:00:00 2001 From: boyealkim Date: Sat, 27 Jul 2019 03:54:44 +0900 Subject: [PATCH 4/4] fix event done impl --- src/stream_manager.cc | 2 +- src/stream_manager.h | 80 +++++++++++++++++++++---------------------- 2 files changed, 41 insertions(+), 41 deletions(-) diff --git a/src/stream_manager.cc b/src/stream_manager.cc index d0eecddba..0d67e108c 100644 --- a/src/stream_manager.cc +++ b/src/stream_manager.cc @@ -198,7 +198,7 @@ bool stream_operation::do_operation( gpgpu_sim *gpu ) //only allows next op to go if event is done //otherwise stays in the stream queue printf("stream wait event processing...\n"); - if(m_event->done()){ + if(m_event->num_updates()>=m_cnt){ printf("stream wait event done\n"); m_stream->record_next_done(); } diff --git a/src/stream_manager.h b/src/stream_manager.h index eb4c6ff06..e73d71a9d 100644 --- a/src/stream_manager.h +++ b/src/stream_manager.h @@ -43,6 +43,45 @@ // unsigned m_pending_streams; //}; +struct CUevent_st { +public: + CUevent_st( bool blocking ) + { + m_uid = ++m_next_event_uid; + m_blocking = blocking; + m_updates = 0; + m_wallclock = 0; + m_gpu_tot_sim_cycle = 0; + m_issued = 0; + m_done = false; + } + void update( double cycle, time_t clk ) + { + m_updates++; + m_wallclock=clk; + m_gpu_tot_sim_cycle=cycle; + m_done = true; + } + //void set_done() { assert(!m_done); m_done=true; } + int get_uid() const { return m_uid; } + unsigned num_updates() const { return m_updates; } + bool done() const { return m_updates==m_issued; } + time_t clock() const { return m_wallclock; } + void issue(){ m_issued++; } + unsigned int num_issued() const{ return m_issued; } +private: + int m_uid; + bool m_blocking; + bool m_done; + int m_updates; + unsigned int m_issued; + time_t m_wallclock; + double m_gpu_tot_sim_cycle; + + static int m_next_event_uid; +}; + + enum stream_operation_type { stream_no_op, stream_memcpy_host_to_device, @@ -107,6 +146,7 @@ class stream_operation { m_kernel=NULL; m_type=stream_wait_event; m_event=e; + m_cnt = m_event->num_issued(); m_stream=stream; m_done=false; } @@ -163,7 +203,6 @@ class stream_operation { void print( FILE *fp ) const; struct CUstream_st *get_stream() { return m_stream; } void set_stream( CUstream_st *stream ) { m_stream = stream; } - private: struct CUstream_st *m_stream; @@ -183,45 +222,6 @@ class stream_operation { kernel_info_t *m_kernel; struct CUevent_st *m_event; }; - -struct CUevent_st { -public: - CUevent_st( bool blocking ) - { - m_uid = ++m_next_event_uid; - m_blocking = blocking; - m_updates = 0; - m_wallclock = 0; - m_gpu_tot_sim_cycle = 0; - m_issued = 0; - m_done = false; - } - void update( double cycle, time_t clk ) - { - m_updates++; - m_wallclock=clk; - m_gpu_tot_sim_cycle=cycle; - m_done = true; - } - //void set_done() { assert(!m_done); m_done=true; } - int get_uid() const { return m_uid; } - unsigned num_updates() const { return m_updates; } - bool done() const { return m_done; } - time_t clock() const { return m_wallclock; } - void issue(){ m_issued++; } - unsigned int num_issued() const{ return m_issued; } -private: - int m_uid; - bool m_blocking; - bool m_done; - int m_updates; - unsigned int m_issued; - time_t m_wallclock; - double m_gpu_tot_sim_cycle; - - static int m_next_event_uid; -}; - struct CUstream_st { public: CUstream_st();