diff --git a/README.md b/README.md index 6b8eb95..d213c7d 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,8 @@ > [!NOTE] > Be mindful to install the backend before the Context Chat php app (Context Chat php app would sends all the user-accessible files to the backend for indexing in the background. It is not an issue even if the request fails to an uninitialised backend since those files would be tried again in the next background job run.) > +> The CPU (or the virtual CPU) should support AVX2 instructions for the embedder/LLM to work. +> > The HTTP request timeout is 50 minutes for all requests and can be changed with the `request_timeout` app config for the php app `context_chat` using the occ command (`occ config:app:set context_chat request_timeout --value=3000`, value is in seconds). The same also needs to be done for docker socket proxy. See [Slow responding ExApps](https://github.com/cloud-py-api/docker-socket-proxy?tab=readme-ov-file#slow-responding-exapps) > > An end-to-end example on how to build and register the backend manually (with CUDA) is at the end of this readme diff --git a/config.cpu.yaml b/config.cpu.yaml index 9b97295..f16404e 100644 --- a/config.cpu.yaml +++ b/config.cpu.yaml @@ -20,13 +20,14 @@ embedding: port: 5000 workers: 1 offload_after_mins: 15 # in minutes - request_timeout: 1800 # in seconds + request_timeout: 1680 # in seconds llama: # 'model_alias' is reserved # 'embedding' is always set to True model: multilingual-e5-large-instruct-q6_k.gguf - n_batch: 16 + n_batch: 512 n_ctx: 8192 + logits_all: false llm: nc_texttotext: diff --git a/config.gpu.yaml b/config.gpu.yaml index e0d00fc..31c7c49 100644 --- a/config.gpu.yaml +++ b/config.gpu.yaml @@ -20,14 +20,15 @@ embedding: port: 5000 workers: 1 offload_after_mins: 15 # in minutes - request_timeout: 1800 # in seconds + request_timeout: 1680 # in seconds llama: # 'model_alias' is reserved # 'embedding' is always set to True model: multilingual-e5-large-instruct-q6_k.gguf - n_batch: 16 + n_batch: 512 n_ctx: 8192 n_gpu_layers: -1 + logits_all: false llm: nc_texttotext: