Skip to content

Commit

Permalink
Sleep before ReadBuffer to resolve Nvidia's busywait issue
Browse files Browse the repository at this point in the history
I did not observe any negative influence on performance with this patch.
The CPU core usage is reduced to about 8% from 100%.

I am getting consistent 38Sols/s on MSI Gaming X 1070 on one instance
while testing. Sols/s on AMD cards (RX480) are not affected.

The clFlush it required as AMD will only start working when there is
blocking operation waiting which is delayed in this case.

This is only suggestion and solution that works for me, it isn't perfect
but it works for me. With this silentarmy is the best miner for Nvidia
that I know.

Don't feel obligated to accept this PR, you can use parts of it or it in
whole to engineer a solution that you think is the best.

I have tried to sched_yield trick, it seems to stopped working
unfortunately.
  • Loading branch information
Kubuxu committed Nov 11, 2016
1 parent 243ed56 commit 162cdef
Showing 1 changed file with 40 additions and 3 deletions.
43 changes: 40 additions & 3 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <unistd.h>
#include <getopt.h>
#include <errno.h>
#include <time.h>
#include <CL/cl.h>
#include "blake.h"
#include "_kernel.h"
Expand All @@ -31,6 +32,7 @@ uint64_t nr_nonces = 1;
uint32_t do_list_devices = 0;
uint32_t gpu_to_use = 0;
uint32_t mining = 0;
struct timespec kern_avg_run_time;

typedef struct debug_s
{
Expand Down Expand Up @@ -113,6 +115,19 @@ void randomize(void *p, ssize_t l)
fatal("close %s: %s\n", fname, strerror(errno));
}

struct timespec time_diff(struct timespec start, struct timespec end)
{
struct timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return temp;
}

cl_mem check_clCreateBuffer(cl_context ctx, cl_mem_flags flags, size_t size,
void *host_ptr)
{
Expand Down Expand Up @@ -774,13 +789,15 @@ uint32_t verify_sol(sols_t *sols, unsigned sol_i)
*/
uint32_t verify_sols(cl_command_queue queue, cl_mem buf_sols, uint64_t *nonce,
uint8_t *header, size_t fixed_nonce_bytes, uint8_t *target,
char *job_id, uint32_t *shares)
char *job_id, uint32_t *shares, struct timespec *start_time)
{
sols_t *sols;
uint32_t nr_valid_sols;
sols = (sols_t *)malloc(sizeof (*sols));
if (!sols)
fatal("malloc: %s\n", strerror(errno));
fatal("malloc: %s\n", strerror(errno));

nanosleep(&kern_avg_run_time, NULL);
check_clEnqueueReadBuffer(queue, buf_sols,
CL_TRUE, // cl_bool blocking_read
0, // size_t offset
Expand All @@ -789,6 +806,22 @@ uint32_t verify_sols(cl_command_queue queue, cl_mem buf_sols, uint64_t *nonce,
0, // cl_uint num_events_in_wait_list
NULL, // cl_event *event_wait_list
NULL); // cl_event *event
struct timespec curr_time;
clock_gettime(CLOCK_MONOTONIC, &curr_time);

struct timespec t_diff = time_diff(*start_time, curr_time);

double a_diff = t_diff.tv_sec * 1e9 + t_diff.tv_nsec;
double kern_avg = kern_avg_run_time.tv_sec * 1e9 + kern_avg_run_time.tv_nsec;
if (kern_avg == 0)
kern_avg = a_diff;
else
kern_avg = kern_avg * 70 / 100 + a_diff * 28 / 100; // it is 2% less than average
// thus allowing time to reduce

kern_avg_run_time.tv_sec = (time_t)(kern_avg / 1e9);
kern_avg_run_time.tv_nsec = ((long)kern_avg) % 1000000000;

if (sols->nr > MAX_SOLS)
{
fprintf(stderr, "%d (probably invalid) solutions were dropped!\n",
Expand Down Expand Up @@ -899,10 +932,14 @@ uint32_t solve_equihash(cl_context ctx, cl_command_queue queue,
check_clSetKernelArg(k_sols, 1, &buf_ht[1]);
check_clSetKernelArg(k_sols, 2, &buf_sols);
global_ws = NR_ROWS;

struct timespec start_time;
clock_gettime(CLOCK_MONOTONIC, &start_time);
check_clEnqueueNDRangeKernel(queue, k_sols, 1, NULL,
&global_ws, &local_work_size, 0, NULL, NULL);
clFlush(queue);
sol_found = verify_sols(queue, buf_sols, nonce_ptr, header,
fixed_nonce_bytes, target, job_id, shares);
fixed_nonce_bytes, target, job_id, shares, &start_time);
clReleaseMemObject(buf_blake_st);
return sol_found;
}
Expand Down

0 comments on commit 162cdef

Please sign in to comment.