Make some small fixes

Mozilla-Ocho · Jan 3, 2024 · 190f96f · 190f96f
1 parent 6b66db4
commit 190f96f
Show file tree

Hide file tree

Showing 6 changed files with 13 additions and 10 deletions.
diff --git a/llama.cpp/common.cpp b/llama.cpp/common.cpp
@@ -563,7 +563,7 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
             }
             params.n_gpu_layers = std::stoi(argv[i]);
             if (params.n_gpu_layers == 0) {
-                FLAG_gpu = LLAMAFILE_GPU_DISABLED;
+                FLAG_gpu = LLAMAFILE_GPU_DISABLE;
             }
         } else if (arg == "--gpu-layers-draft" || arg == "-ngld" || arg == "--n-gpu-layers-draft") {
             passed_gpu_flags = true;

diff --git a/llama.cpp/ggml-cuda.cu b/llama.cpp/ggml-cuda.cu
@@ -59,6 +59,8 @@
 #define cudaGetDevice hipGetDevice
 #define cudaGetDeviceCount hipGetDeviceCount
 #define cudaGetDeviceProperties hipGetDeviceProperties
+#define cudaDeviceGetAttribute hipDeviceGetAttribute
+#define cudaDevAttrMultiProcessorCount hipDeviceAttributeMultiprocessorCount
 #define cudaGetErrorString hipGetErrorString
 #define cudaGetLastError hipGetLastError
 #define cudaMalloc hipMalloc

diff --git a/llama.cpp/main/main.1 b/llama.cpp/main/main.1
@@ -404,7 +404,7 @@ flag passed. The GGML library will then be compiled and saved to
 .Pa ~/.llamafile/
 so the special process only needs to happen a single time.
 .It
-.Ar DISABLED :
+.Ar DISABLE :
 Never use GPU and instead use CPU inference. This setting is implied by
 .Fl ngl Ar 0 .
 .El
@@ -485,6 +485,8 @@ generating text. For example, the grammar:
 .Pp
 will force the LLM to only output yes or no before exiting. This is
 useful for shell scripts when the
+.Fl Fl silent-prompt
+flag is also supplied.
 .It Fl Fl grammar-file Ar FNAME
 File to read grammar from.
 .It Fl Fl prompt-cache Ar FNAME
@@ -512,8 +514,6 @@ Run in interactive mode.
 Run in interactive mode and wait for input right away.
 .It Fl ins , Fl Fl instruct
 Run in instruction mode (use with Alpaca models).
-.Fl Fl silent-prompt
-flag is also supplied.
 .It Fl r Ar PROMPT , Fl Fl reverse-prompt Ar PROMPT
 Halt generation at
 .Ar PROMPT

diff --git a/llama.cpp/main/main.1.asc b/llama.cpp/main/main.1.asc
@@ -371,7 +371,7 @@ OOPPTTIIOONNSS
                  be compiled and saved to _~_/_._l_l_a_m_a_f_i_l_e_/ so the special process
                  only needs to happen a single time.
 
-             --   _D_I_S_A_B_L_E_D: Never use GPU and instead use CPU inference. This
+             --   _D_I_S_A_B_L_E: Never use GPU and instead use CPU inference. This
                  setting is implied by --nnggll _0.
 
      --nnggll _N, ----nn--ggppuu--llaayyeerrss _N
@@ -468,7 +468,8 @@ CCLLII OOPPTTIIOONNSS
                    root ::= "yes" | "no"
 
              will force the LLM to only output yes or no before exiting. This
-             is useful for shell scripts when the
+             is useful for shell scripts when the ----ssiilleenntt--pprroommpptt flag is also
+             supplied.
 
      ----ggrraammmmaarr--ffiillee _F_N_A_M_E
              File to read grammar from.
@@ -503,7 +504,6 @@ CCLLII OOPPTTIIOONNSS
 
      --iinnss, ----iinnssttrruucctt
              Run in instruction mode (use with Alpaca models).
-             ----ssiilleenntt--pprroommpptt flag is also supplied.
 
      --rr _P_R_O_M_P_T, ----rreevveerrssee--pprroommpptt _P_R_O_M_P_T
              Halt generation at _P_R_O_M_P_T and return control in interactive mode

diff --git a/llamafile/gpu.c b/llamafile/gpu.c
@@ -38,7 +38,7 @@ static const char *describe_required_gpu(void) {
             return "apple";
         case LLAMAFILE_GPU_NVIDIA:
             return "nvidia";
-        case LLAMAFILE_GPU_DISABLED:
+        case LLAMAFILE_GPU_DISABLE:
             return "disabled";
         default:
             __builtin_unreachable();
@@ -78,13 +78,14 @@ int llamafile_gpu_supported(void) {
 int llamafile_gpu_parse(const char *s) {
 
     // Parse canonical names for GPUs.
-    if (!strcasecmp(s, "disabled")) return LLAMAFILE_GPU_DISABLED;
+    if (!strcasecmp(s, "disable")) return LLAMAFILE_GPU_DISABLE;
     if (!strcasecmp(s, "auto")) return LLAMAFILE_GPU_AUTO;
     if (!strcasecmp(s, "amd")) return LLAMAFILE_GPU_AMD;
     if (!strcasecmp(s, "apple")) return LLAMAFILE_GPU_APPLE;
     if (!strcasecmp(s, "nvidia")) return LLAMAFILE_GPU_NVIDIA;
 
     // Parse aliases.
+    if (!strcasecmp(s, "disabled")) return LLAMAFILE_GPU_DISABLE;
     if (!strcasecmp(s, "metal")) return LLAMAFILE_GPU_APPLE;
     if (!strcasecmp(s, "cublas")) return LLAMAFILE_GPU_NVIDIA;
     if (!strcasecmp(s, "rocblas")) return LLAMAFILE_GPU_AMD;

diff --git a/llamafile/llamafile.h b/llamafile/llamafile.h
@@ -27,7 +27,7 @@ void llamafile_schlep(const void *, size_t);
 void llamafile_get_app_dir(char *, size_t);
 bool llamafile_launch_browser(const char *);
 
-#define LLAMAFILE_GPU_DISABLED -1
+#define LLAMAFILE_GPU_DISABLE -1
 #define LLAMAFILE_GPU_AUTO 0
 #define LLAMAFILE_GPU_AMD 1
 #define LLAMAFILE_GPU_APPLE 2