From f6426036fd0577a093332b9253faae564c81b7dd Mon Sep 17 00:00:00 2001 From: digger yu Date: Tue, 6 Feb 2024 10:42:46 +0800 Subject: [PATCH] fix some typo --- README_EN.md | 8 ++++---- demo/run_demo.py | 8 ++++---- evaluation/utils.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/README_EN.md b/README_EN.md index 5686205..b9873b9 100644 --- a/README_EN.md +++ b/README_EN.md @@ -18,7 +18,7 @@ CodeGeeX2 is the second-generation model of the multilingual code generation mod * **More Powerful Coding Capabilities**: Based on the ChatGLM2-6B model, CodeGeeX2-6B has been further pre-trained on 600B code tokens, which has been comprehensively improved in coding capability compared to the first-generation. On the [HumanEval-X](https://huggingface.co/datasets/THUDM/humaneval-x) benchmark, all six languages have been significantly improved (Python +57%, C++ +71%, Java +54%, JavaScript +83%, Go +56%, Rust +321\%), and in Python it reached 35.9% of Pass@1 one-time pass rate, surpassing the larger StarCoder-15B. * **More Useful Features**: Inheriting the ChatGLM2-6B model features, CodeGeeX2-6B better supports both Chinese and English prompts, maximum 8192 sequence length, and the inference speed is significantly improved compared to the first-generation. After quantization, it only needs 6GB of GPU memory for inference, thus supports lightweight local deployment. -* **Comprehensive AI Coding Assistant**: The backend of CodeGeeX plugin ([VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)) is upgraded, supporting 100+ programming languages, and adding practical functions such as infilling and cross-file completion. Combined with the "Ask CodeGeeX" interactive AI coding assistant, it can be used to solve various programming problems via Chinese or English dialogue, including but not limited to code summarization, code translation, debugging, and comment generation, which helps increasing the efficiency of developpers. +* **Comprehensive AI Coding Assistant**: The backend of CodeGeeX plugin ([VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)) is upgraded, supporting 100+ programming languages, and adding practical functions such as infilling and cross-file completion. Combined with the "Ask CodeGeeX" interactive AI coding assistant, it can be used to solve various programming problems via Chinese or English dialogue, including but not limited to code summarization, code translation, debugging, and comment generation, which helps increasing the efficiency of developers. * **Open License**: CodeGeeX2-6B weights are fully open to academic research, and please apply for commercial use by filling in the [registration form](https://open.bigmodel.cn/mla/form?mcode=CodeGeeX2-6B). @@ -66,8 +66,8 @@ python ./demo/run_demo.py ``` ❗️Attention: -* CodeGeeX2 is a base model, which is not instruction-tuned for chatting. It can do tasks like code completion/translation/explaination. To try the instruction-tuned version in CodeGeeX plugins ([VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)). -* Programming languages can be controled by adding `language tag`, e.g., `# language: Python`. The format should be respected to ensure performance, full list can be found [here](https://github.com/THUDM/CodeGeeX2/blob/main/evaluation/utils.py#L14). Please write comments under the format of the selected programming language to achieve better results. +* CodeGeeX2 is a base model, which is not instruction-tuned for chatting. It can do tasks like code completion/translation/explanation. To try the instruction-tuned version in CodeGeeX plugins ([VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)). +* Programming languages can be controlled by adding `language tag`, e.g., `# language: Python`. The format should be respected to ensure performance, full list can be found [here](https://github.com/THUDM/CodeGeeX2/blob/main/evaluation/utils.py#L14). Please write comments under the format of the selected programming language to achieve better results. * If the GPU doesn't support `bfloat16` format, it will cause incorrect output. Please convert the model to `float16` format: ```python model = AutoModel.from_pretrained("THUDM/codegeex2-6b", trust_remote_code=True).half().cuda() @@ -151,7 +151,7 @@ CodeGeeX2 is more friendly to deployment than the previous generation. Thanks to | :--------------: | :-------: | :-----: | :----: | | CodeGeeX-13B | 26\.9 GB | 14\.7 GB | - | | **CodeGeeX2-6B** | 13\.1 GB | 8\.2 GB | 5\.5 GB | -> Based on PyTorch 2.0, using `torch.nn.functional.scaled_dot_product_attention` for effecient attention mechanism。 +> Based on PyTorch 2.0, using `torch.nn.functional.scaled_dot_product_attention` for efficient attention mechanism。 ### Acceleration diff --git a/demo/run_demo.py b/demo/run_demo.py index ea58ecb..202899b 100644 --- a/demo/run_demo.py +++ b/demo/run_demo.py @@ -219,7 +219,7 @@ def add_code_generation_args(parser): def set_random_seed(seed): - """Set random seed for reproducability.""" + """Set random seed for reproducibility.""" random.seed(seed) numpy.random.seed(seed) torch.manual_seed(seed) @@ -312,8 +312,8 @@ def predict( * 按照所选编程语言的格式写注释可以获得更好的结果,请参照下方给出的示例。 This is the DEMO for CodeGeeX2. Please note that: - * CodeGeeX2 is a base model, which is not instruction-tuned for chatting. It can do tasks like code completion/translation/explaination. To try the instruction-tuned version in CodeGeeX plugins ([VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)). - * Programming languages can be controled by adding `language tag`, e.g., `# language: Python`. The format should be respected to ensure performance, full list can be found [here](https://github.com/THUDM/CodeGeeX2/blob/main/evaluation/utils.py#L14). + * CodeGeeX2 is a base model, which is not instruction-tuned for chatting. It can do tasks like code completion/translation/explanation. To try the instruction-tuned version in CodeGeeX plugins ([VS Code](https://marketplace.visualstudio.com/items?itemName=aminer.codegeex), [Jetbrains](https://plugins.jetbrains.com/plugin/20587-codegeex)). + * Programming languages can be controlled by adding `language tag`, e.g., `# language: Python`. The format should be respected to ensure performance, full list can be found [here](https://github.com/THUDM/CodeGeeX2/blob/main/evaluation/utils.py#L14). * Write comments under the format of the selected programming language to achieve better results, see examples below. """) @@ -348,7 +348,7 @@ def predict( clr.click(fn=lambda value: gr.update(value=""), inputs=clr, outputs=prompt) gr_examples = gr.Examples(examples=examples, inputs=[prompt, lang], - label="Example Inputs (Click to insert an examplet it into the input box)", + label="Example Inputs (Click to insert an example into the input box)", examples_per_page=20) if not args.auth: demo.launch(server_name=args.listen, server_port=args.port) diff --git a/evaluation/utils.py b/evaluation/utils.py index 0147bc0..9434426 100644 --- a/evaluation/utils.py +++ b/evaluation/utils.py @@ -170,7 +170,7 @@ def set_random_seed(seed): - """Set random seed for reproducability.""" + """Set random seed for reproducibility.""" random.seed(seed) numpy.random.seed(seed) torch.manual_seed(seed)