diff --git a/common/args.py b/common/args.py index 14508a7c..6bb0804f 100644 --- a/common/args.py +++ b/common/args.py @@ -17,13 +17,15 @@ def init_argparser(): """Creates an argument parser that any function can use""" parser = argparse.ArgumentParser( - epilog="These args are only for a subset of the config. " - + "Please edit config.yml for all options!" + epilog="NOTE: These args serve to override parts of the config. " + + "It's highly recommended to edit config.yml for all options and " + + "better descriptions!" ) add_network_args(parser) add_model_args(parser) add_logging_args(parser) add_developer_args(parser) + add_sampling_args(parser) add_config_args(parser) return parser @@ -64,6 +66,11 @@ def add_network_args(parser: argparse.ArgumentParser): type=str_to_bool, help="Disable HTTP token authenticaion with requests", ) + network_group.add_argument( + "--send-tracebacks", + type=str_to_bool, + help="Decide whether to send error tracebacks over the API", + ) def add_model_args(parser: argparse.ArgumentParser): @@ -74,6 +81,17 @@ def add_model_args(parser: argparse.ArgumentParser): "--model-dir", type=str, help="Overrides the directory to look for models" ) model_group.add_argument("--model-name", type=str, help="An initial model to load") + model_group.add_argument( + "--use-dummy-models", + type=str_to_bool, + help="Add dummy OAI model names for API queries", + ) + model_group.add_argument( + "--use-as-default", + type=str, + nargs="+", + help="Names of args to use as a default fallback for API load requests ", + ) model_group.add_argument( "--max-seq-len", type=int, help="Override the maximum model sequence length" ) @@ -83,30 +101,51 @@ def add_model_args(parser: argparse.ArgumentParser): help="Overrides base model context length", ) model_group.add_argument( - "--cache-size", + "--gpu-split-auto", + type=str_to_bool, + help="Automatically allocate resources to GPUs", + ) + model_group.add_argument( + "--autosplit-reserve", type=int, - help="The size of the prompt cache (in number of tokens) to allocate", + nargs="+", + help="Reserve VRAM used for autosplit loading (in MBs) ", + ) + model_group.add_argument( + "--gpu-split", + type=float, + nargs="+", + help="An integer array of GBs of vram to split between GPUs. " + + "Ignored if gpu_split_auto is true", ) model_group.add_argument( "--rope-scale", type=float, help="Sets rope_scale or compress_pos_emb" ) model_group.add_argument("--rope-alpha", type=float, help="Sets rope_alpha for NTK") model_group.add_argument( - "--prompt-template", + "--cache-mode", type=str, - help="Set the prompt template for chat completions", + help="Set the quantization level of the K/V cache. Options: (FP16, Q8, Q6, Q4)", ) model_group.add_argument( - "--gpu-split-auto", - type=str_to_bool, - help="Automatically allocate resources to GPUs", + "--cache-size", + type=int, + help="The size of the prompt cache (in number of tokens) to allocate", ) model_group.add_argument( - "--gpu-split", - type=float, - nargs="+", - help="An integer array of GBs of vram to split between GPUs. " - + "Ignored if gpu_split_auto is true", + "--chunk-size", + type=int, + help="Chunk size for prompt ingestion", + ) + model_group.add_argument( + "--max-batch-size", + type=int, + help="Maximum amount of prompts to process at one time", + ) + model_group.add_argument( + "--prompt-template", + type=str, + help="Set the jinja2 prompt template for chat completions", ) model_group.add_argument( "--num-experts-per-token", @@ -114,9 +153,9 @@ def add_model_args(parser: argparse.ArgumentParser): help="Number of experts to use per token in MoE models", ) model_group.add_argument( - "--use-cfg", + "--fasttensors", type=str_to_bool, - help="Enables CFG support", + help="Possibly increases model loading speeds", ) @@ -151,3 +190,12 @@ def add_developer_args(parser: argparse.ArgumentParser): type=str_to_bool, help="Disables API request streaming", ) + + +def add_sampling_args(parser: argparse.ArgumentParser): + """Adds sampling-specific arguments""" + + sampling_group = parser.add_argument_group("sampling") + sampling_group.add_argument( + "--override-preset", type=str, help="Select a sampler override preset" + )