-
Notifications
You must be signed in to change notification settings - Fork 0
/
flake.nix
58 lines (54 loc) · 2.65 KB
/
flake.nix
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
{
nixConfig = {
extra-trusted-public-keys = "replicate-1:rbU0MI8kgUmqLINtKfXoDkrl9NxXQMw6//+LHHDYflk=";
extra-substituters = "https://storage.googleapis.com/replicate-nix-cache-dev/";
};
inputs = {
cognix.url = "github:datakami/cognix/24.03";
};
outputs = { self, cognix }@inputs: (cognix.lib.cognixFlake inputs {}) // {
packages.x86_64-linux = let
sourceRev = self.sourceInfo.rev or self.sourceInfo.dirtyRev or null;
callCognix = args: cognix.legacyPackages.x86_64-linux.callCognix ({ lib, ... }: {
paths.projectRoot = self;
imports = [ args ];
dockerTools.streamLayeredImage.config = lib.mkIf (sourceRev != null) {
Labels."org.opencontainers.image.revision" = sourceRev;
};
}) "${self}";
makeRunner = name: architectures: env: callCognix ( {config, lib, ... }: {
inherit name;
# only grab deps of nvidia-pytriton, transformers
cognix.python_root_packages = [ "nvidia-pytriton" "transformers" "tokenizers" ];
cognix.environment.TRITONSERVER_BACKEND_DIR = "${config.deps.backend_dir}/backends";
cog-triton.architectures = architectures;
# don't need this file in a runner
python-env.pip.drvs.tensorrt-libs.mkDerivation.postInstall = lib.mkAfter ''
rm $out/lib/python*/site-packages/tensorrt_libs/libnvinfer_builder_resource*
'';
});
makeBuilder = name: callCognix ( { config, lib, ... }: {
inherit name;
# only grab deps of tensorrt-llm, omegaconf, hf-transfer
cognix.python_root_packages = [ "tensorrt-llm" "omegaconf" "hf-transfer" ];
# override cog.yaml:
cog.concurrency.max = lib.mkForce 1;
cognix.rootPath = lib.mkForce "${./cog-trt-llm}";
# this just needs the examples/ dir
cognix.environment.TRTLLM_DIR = config.deps.tensorrt-llm.examples;
});
in {
cog-triton-builder = makeBuilder "cog-triton-builder";
cog-triton-runner-80 = makeRunner "cog-triton-runner-80" ["80-real"] {};
cog-triton-runner-86 = makeRunner "cog-triton-runner-86" ["86-real"] {};
cog-triton-runner-90 = makeRunner "cog-triton-runner-90" ["90-real"] {};
# mistral example, update for new engine
# default = self.packages.x86_64-linux.cog-triton-mistral-7b;
# cog-triton-mistral-7b = makeRunner "cog-triton-mistral-7b" ["86-real"] {
# COG_WEIGHTS = "https://replicate.delivery/pbxt/9yf58OhSA5VZCCiflRRmgfVSnxujfuLfXSk6P24Yyu54Db7TC/engine.tar";
# SYSTEM_PROMPT = "You are a very helpful, respectful and honest assistant.";
# PROMPT_TEMPLATE = "<s>[INST] {system_prompt} {prompt} [/INST]";
# };
};
};
}