From 6131a3c316725173ca0d26dc2da80d9557e58173 Mon Sep 17 00:00:00 2001 From: "Lv, Kaokao" Date: Wed, 22 May 2024 17:19:35 +0800 Subject: [PATCH 1/7] add GenAI_HFLM class to support microservice. --- .../lm_eval/models/huggingface.py | 312 ++++++++++++++++++ 1 file changed, 312 insertions(+) diff --git a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py b/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py index 38f5d095..10554b23 100644 --- a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py +++ b/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py @@ -36,6 +36,11 @@ MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES, ) +from lm_eval.api.registry import register_model +from lm_eval.api.model import CacheHook +import requests as requests_obj +from requests.exceptions import RequestException +import json eval_logger = utils.eval_logger @@ -1217,3 +1222,310 @@ def _model_call(self, inps): logits = logits[:, :-padding_length, :] logits = logits.to(torch.float32) return logits + + +@register_model("genai-hf") +class GenAI_HFLM(HFLM): + AUTO_MODEL_CLASS = transformers.AutoModelForCausalLM + + def __init__( + self, + base_url=None, + logits_cache: bool = True, + tokenizer: Optional[str] = None, + revision: Optional[str] = "main", + batch_size: int = 1, + max_length: Optional[int] = None, + trust_remote_code: Optional[bool] = False, + use_fast_tokenizer: Optional[bool] = True, + add_bos_token: Optional[bool] = False, + prefix_token_id: Optional[int] = None, + **kwargs): + self.base_url = base_url + assert self.base_url, "must pass `base_url` to use GenaAI service!" + self._rank = 0 + self._world_size = 1 + + self.tokenizer = transformers.AutoTokenizer.from_pretrained( + tokenizer, + revision=revision, + trust_remote_code=trust_remote_code, + use_fast=use_fast_tokenizer, + ) + + self.logits_cache = logits_cache + # select (or create) a pad token to use + if self.tokenizer.pad_token: + pass + elif self.tokenizer.unk_token: + self.tokenizer.pad_token_id = self.tokenizer.unk_token_id + elif self.tokenizer.eos_token: + self.tokenizer.pad_token_id = self.tokenizer.eos_token_id + else: + if getattr(self.config, "model_type", None) == "qwen": + # Qwen's trust_remote_code tokenizer does not allow for adding special tokens + self.tokenizer.pad_token = "<|endoftext|>" + elif ( + self.tokenizer.__class__.__name__ == "RWKVWorldTokenizer" + or self.tokenizer.__class__.__name__ == "Rwkv5Tokenizer" + ): + # The RWKV world tokenizer, does not allow for adding special tokens / setting the pad token (which is set as 0) + # The additional tokenizer name check is needed, as there exists rwkv4 models with neox tokenizer + # --- + # Note that the world tokenizer class name, might change in the future for the final huggingface merge + # https://github.com/huggingface/transformers/pull/26963 + assert self.tokenizer.pad_token_id == 0 + else: + self.tokenizer.add_special_tokens({"pad_token": "<|pad|>"}) + + # TODO: override this for Gemma + self.add_bos_token = add_bos_token + if "GemmaTokenizer" in self.tokenizer.__class__.__name__: + self.add_bos_token = True + eval_logger.info( + f"Model type is '{self.config.model_type}', a BOS token will be used as Gemma underperforms without it." + ) + + self._batch_size = int(batch_size) + self._max_length = max_length + self.custom_prefix_token_id = prefix_token_id + if prefix_token_id is not None: + eval_logger.info( + f"Loglikelihood prefix token id used in evaluation: {self.prefix_token_id}" + ) + self.cache_hook = CacheHook(None) + self.headers = {"Content-Type": "application/json"} + + @property + def max_length(self) -> int: + if self._max_length: + return self._max_length + else: + return self._DEFAULT_MAX_LENGTH + + @property + def batch_size(self) -> int: + return self._batch_size + + def _loglikelihood_tokens( + self, + task_requests: List[Tuple[Tuple[str, str], List[int], List[int]]], + disable_tqdm: bool = False, + override_bs: int = None, + ) -> List[Tuple[float, bool]]: + # TODO: implement some kind of efficient-request-middleware that lumps together requests with the same context + res = [] + + def _collate(req: Tuple[Tuple[str, str], List[int], List[int]]): + """Defines the key for the sorted method""" + # the negative sign on len(toks) sorts descending - this has a few advantages: + # - time estimates will always be over not underestimates, which is more useful for planning + # - to know the size of a batch when going through the list, you know the first one is always the batch + # padded context length. this is useful to simplify the batching logic and more importantly to make + # automatic adaptive batches much much easier to implement + # - any OOMs will happen right away rather than near the end + + toks = req[1] + req[2] + return -len(toks), tuple(toks) + + def _lookup_one_token_cont(req: Tuple[Tuple[str, str], List[int], List[int]]): + """Defines the key to group and lookup one-token continuations""" + # Use with group_by="contexts" (optional)" + # allows for the creation of a lookup, so we can reuse logits in case of one-token continuations. + # speeds up some multiple-choice tasks proportionally to the number of choices. + # groups requests by context+continuation[:-1] and infer on one request/group. + return req[-2] + req[-1][:-1] + + re_ord = Collator( + task_requests, + sort_fn=_collate, + group_by=None, + group_fn=_lookup_one_token_cont, + ) + + # automatic (variable) batch size detection for vectorization + # pull longest context sample from request + n_reordered_requests = len(re_ord) + batch_size = ( + self.batch_size + if self.batch_size != "auto" + else override_bs + if override_bs is not None + else 0 + ) + batch_fn = ( + self._batch_scheduler + if self.batch_size == "auto" + and n_reordered_requests > 0 + and not override_bs + else None + ) + + chunks = re_ord.get_batched(n=batch_size, batch_fn=batch_fn) + pbar = tqdm( + total=len(task_requests), + disable=(disable_tqdm or (self.rank != 0)), + desc="Running loglikelihood requests", + ) + for chunk in chunks: + inps = [] + cont_toks_list = [] + inplens = [] + + conts = [] + encoder_attns = [] + + padding_len_inp = None + padding_len_cont = None + # because vectorizing is annoying, we first convert each (context, continuation) pair to padded + # tensors, then we pack them together into a batch, call the model, and then pick it all apart + # again because vectorizing is annoying + + for _, context_enc, continuation_enc in chunk: + # sanity check + assert len(context_enc) > 0 + assert len(continuation_enc) > 0 + assert len(continuation_enc) <= self.max_length + + # how this all works (illustrated on a causal decoder-only setup): + # CTX CONT + # inp 0 1 2 3|4 5 6 7 8 9 <- last token is deleted by inp[:, :-1] + # model \ \ + # logits 1 2 3|4 5 6 7 8 9 <- the ctx half gets tossed out by the + # cont_toks 4 5 6 7 8 9 [:, -len(continuation_enc):, :self.vocab_size] slice + + # when too long to fit in context, truncate from the left + if self.AUTO_MODEL_CLASS == transformers.AutoModelForCausalLM: + inp = torch.tensor( + (context_enc + continuation_enc)[-(self.max_length + 1) :], + dtype=torch.long, + ) + (inplen,) = inp.shape + elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM: + inp = torch.tensor( + (context_enc)[-self.max_length :], + dtype=torch.long, + ) + (inplen,) = inp.shape + + # build encoder attn masks + encoder_attns.append(torch.ones_like(inp)) + + cont = torch.tensor( + (continuation_enc)[-self.max_length :], + # TODO: left-shift these? + # TODO: our code assumes we never end up truncating conts for either model type + dtype=torch.long, + ) + (contlen,) = cont.shape + + conts.append(cont) + + padding_len_cont = ( + max(padding_len_cont, contlen) + if padding_len_cont is not None + else contlen + ) + + padding_len_inp = ( + max(padding_len_inp, inplen) + if padding_len_inp is not None + else inplen + ) + + inps.append(inp) # [1, inp_length] + cont_toks_list.append(continuation_enc) + inplens.append(inplen) + + # create encoder attn mask and batched conts, if seq2seq + call_kwargs = {} + if self.AUTO_MODEL_CLASS == transformers.AutoModelForCausalLM: + batched_inps = pad_and_concat( + padding_len_inp, inps, padding_side="right" + ) # [batch, padding_len_inp] + elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM: + # TODO: left-pad encoder inps and mask? + batched_inps = pad_and_concat( + padding_len_inp, inps + ) # [batch, padding_len_inp] + batched_conts = pad_and_concat( + padding_len_cont, conts + ) # [batch, padding_len_cont] + batched_encoder_mask = pad_and_concat( + padding_len_inp, encoder_attns + ) # [batch, padding_len_inp] + call_kwargs = { + "attn_mask": batched_encoder_mask, + "labels": batched_conts, + } + + data = { + "batched_inputs": batched_inps.tolist(), + } + try: + response = requests_obj.post( + f"{self.base_url}/v1/completions", + headers=self.headers, + data=json.dumps(data), + ) + response.raise_for_status() + response = response.json() + except RequestException as e: + logger.error(f"RequestException: {e}") + + for (request_str, ctx_tokens, _), greedy_tokens, logprobs, inplen, cont_toks in zip( + chunk, response["greedy_tokens"], response["logprobs"],inplens, cont_toks_list + ): + # Slice to original seq length + contlen = len(cont_toks) + # take only logits in the continuation + # (discard context toks if decoder-only ; discard right-padding) + # also discards + checks for "virtual tokens" in the causal LM's input window + # from prompt/prefix tuning tokens, if applicable + ctx_len = ( + inplen + (len(logprobs) - padding_len_inp) + if self.AUTO_MODEL_CLASS == transformers.AutoModelForCausalLM + else None + ) + cont_toks = torch.tensor( + cont_toks, dtype=torch.long + ).unsqueeze(0) # [1, seq] + greedy_tokens = torch.tensor( + self._select_cont_toks(greedy_tokens, contlen=contlen, inplen=ctx_len), + dtype=torch.long + ).unsqueeze(0) # [1, seq] + max_equal = (greedy_tokens == cont_toks).all() + cont_logprobs = self._select_cont_toks(logprobs, contlen=contlen, inplen=ctx_len) + + # Answer: (log prob, is-exact-match) + answer = (sum(cont_logprobs), bool(max_equal)) + + res.append(answer) + + self.cache_hook.add_partial("loglikelihood", request_str, answer) + pbar.update(1) + + pbar.close() + + return re_ord.get_original(res) + + def _model_call(self, inps): + # Isn't used because we override _loglikelihood_tokens + raise NotImplementedError() + + def _model_generate(self, context, max_length, eos_token_id): + # Isn't used because we override generate_until + raise NotImplementedError() + + @property + def device(self): + # Isn't used because we override _loglikelihood_tokens + raise NotImplementedError() + + def loglikelihood_rolling(self, requests, disable_tqdm: bool = False): + raise NotImplementedError( + "loglikelihood_rolling not yet supported for GenAI service" + ) + + def generate_until(self, requests, disable_tqdm: bool = False) -> List[str]: + raise NotImplementedError("Not supported yet.") From d86fa599adf8741373a7a3ba52f46d4e44be13db Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 22 May 2024 09:21:29 +0000 Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../lm_eval/models/huggingface.py | 103 +++++++----------- 1 file changed, 37 insertions(+), 66 deletions(-) diff --git a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py b/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py index 10554b23..30b7aebe 100644 --- a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py +++ b/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py @@ -15,32 +15,31 @@ # See the License for the specific language governing permissions and # limitations under the License. import copy +import json import os from datetime import timedelta from pathlib import Path from typing import List, Literal, Optional, Tuple, Union +import requests as requests_obj import torch import torch.nn.functional as F import transformers from accelerate import Accelerator, DistributedType, InitProcessGroupKwargs, find_executable_batch_size from lm_eval import utils from lm_eval.api.instance import Instance -from lm_eval.api.model import TemplateLM +from lm_eval.api.model import CacheHook, TemplateLM +from lm_eval.api.registry import register_model from lm_eval.models.utils import Collator, clear_torch_cache, get_dtype, pad_and_concat, stop_sequences_criteria from packaging import version from peft import PeftModel from peft import __version__ as PEFT_VERSION +from requests.exceptions import RequestException from tqdm import tqdm from transformers.models.auto.modeling_auto import ( MODEL_FOR_CAUSAL_LM_MAPPING_NAMES, MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING_NAMES, ) -from lm_eval.api.registry import register_model -from lm_eval.api.model import CacheHook -import requests as requests_obj -from requests.exceptions import RequestException -import json eval_logger = utils.eval_logger @@ -1240,18 +1239,19 @@ def __init__( use_fast_tokenizer: Optional[bool] = True, add_bos_token: Optional[bool] = False, prefix_token_id: Optional[int] = None, - **kwargs): + **kwargs, + ): self.base_url = base_url assert self.base_url, "must pass `base_url` to use GenaAI service!" self._rank = 0 self._world_size = 1 self.tokenizer = transformers.AutoTokenizer.from_pretrained( - tokenizer, - revision=revision, - trust_remote_code=trust_remote_code, - use_fast=use_fast_tokenizer, - ) + tokenizer, + revision=revision, + trust_remote_code=trust_remote_code, + use_fast=use_fast_tokenizer, + ) self.logits_cache = logits_cache # select (or create) a pad token to use @@ -1280,7 +1280,7 @@ def __init__( # TODO: override this for Gemma self.add_bos_token = add_bos_token - if "GemmaTokenizer" in self.tokenizer.__class__.__name__: + if "GemmaTokenizer" in self.tokenizer.__class__.__name__: self.add_bos_token = True eval_logger.info( f"Model type is '{self.config.model_type}', a BOS token will be used as Gemma underperforms without it." @@ -1290,9 +1290,7 @@ def __init__( self._max_length = max_length self.custom_prefix_token_id = prefix_token_id if prefix_token_id is not None: - eval_logger.info( - f"Loglikelihood prefix token id used in evaluation: {self.prefix_token_id}" - ) + eval_logger.info(f"Loglikelihood prefix token id used in evaluation: {self.prefix_token_id}") self.cache_hook = CacheHook(None) self.headers = {"Content-Type": "application/json"} @@ -1317,7 +1315,7 @@ def _loglikelihood_tokens( res = [] def _collate(req: Tuple[Tuple[str, str], List[int], List[int]]): - """Defines the key for the sorted method""" + """Defines the key for the sorted method.""" # the negative sign on len(toks) sorts descending - this has a few advantages: # - time estimates will always be over not underestimates, which is more useful for planning # - to know the size of a batch when going through the list, you know the first one is always the batch @@ -1329,7 +1327,7 @@ def _collate(req: Tuple[Tuple[str, str], List[int], List[int]]): return -len(toks), tuple(toks) def _lookup_one_token_cont(req: Tuple[Tuple[str, str], List[int], List[int]]): - """Defines the key to group and lookup one-token continuations""" + """Defines the key to group and lookup one-token continuations.""" # Use with group_by="contexts" (optional)" # allows for the creation of a lookup, so we can reuse logits in case of one-token continuations. # speeds up some multiple-choice tasks proportionally to the number of choices. @@ -1346,18 +1344,10 @@ def _lookup_one_token_cont(req: Tuple[Tuple[str, str], List[int], List[int]]): # automatic (variable) batch size detection for vectorization # pull longest context sample from request n_reordered_requests = len(re_ord) - batch_size = ( - self.batch_size - if self.batch_size != "auto" - else override_bs - if override_bs is not None - else 0 - ) + batch_size = self.batch_size if self.batch_size != "auto" else override_bs if override_bs is not None else 0 batch_fn = ( self._batch_scheduler - if self.batch_size == "auto" - and n_reordered_requests > 0 - and not override_bs + if self.batch_size == "auto" and n_reordered_requests > 0 and not override_bs else None ) @@ -1421,17 +1411,9 @@ def _lookup_one_token_cont(req: Tuple[Tuple[str, str], List[int], List[int]]): conts.append(cont) - padding_len_cont = ( - max(padding_len_cont, contlen) - if padding_len_cont is not None - else contlen - ) + padding_len_cont = max(padding_len_cont, contlen) if padding_len_cont is not None else contlen - padding_len_inp = ( - max(padding_len_inp, inplen) - if padding_len_inp is not None - else inplen - ) + padding_len_inp = max(padding_len_inp, inplen) if padding_len_inp is not None else inplen inps.append(inp) # [1, inp_length] cont_toks_list.append(continuation_enc) @@ -1440,41 +1422,33 @@ def _lookup_one_token_cont(req: Tuple[Tuple[str, str], List[int], List[int]]): # create encoder attn mask and batched conts, if seq2seq call_kwargs = {} if self.AUTO_MODEL_CLASS == transformers.AutoModelForCausalLM: - batched_inps = pad_and_concat( - padding_len_inp, inps, padding_side="right" - ) # [batch, padding_len_inp] + batched_inps = pad_and_concat(padding_len_inp, inps, padding_side="right") # [batch, padding_len_inp] elif self.AUTO_MODEL_CLASS == transformers.AutoModelForSeq2SeqLM: # TODO: left-pad encoder inps and mask? - batched_inps = pad_and_concat( - padding_len_inp, inps - ) # [batch, padding_len_inp] - batched_conts = pad_and_concat( - padding_len_cont, conts - ) # [batch, padding_len_cont] - batched_encoder_mask = pad_and_concat( - padding_len_inp, encoder_attns - ) # [batch, padding_len_inp] + batched_inps = pad_and_concat(padding_len_inp, inps) # [batch, padding_len_inp] + batched_conts = pad_and_concat(padding_len_cont, conts) # [batch, padding_len_cont] + batched_encoder_mask = pad_and_concat(padding_len_inp, encoder_attns) # [batch, padding_len_inp] call_kwargs = { "attn_mask": batched_encoder_mask, "labels": batched_conts, } data = { - "batched_inputs": batched_inps.tolist(), - } + "batched_inputs": batched_inps.tolist(), + } try: response = requests_obj.post( - f"{self.base_url}/v1/completions", - headers=self.headers, - data=json.dumps(data), - ) + f"{self.base_url}/v1/completions", + headers=self.headers, + data=json.dumps(data), + ) response.raise_for_status() response = response.json() except RequestException as e: logger.error(f"RequestException: {e}") for (request_str, ctx_tokens, _), greedy_tokens, logprobs, inplen, cont_toks in zip( - chunk, response["greedy_tokens"], response["logprobs"],inplens, cont_toks_list + chunk, response["greedy_tokens"], response["logprobs"], inplens, cont_toks_list ): # Slice to original seq length contlen = len(cont_toks) @@ -1487,13 +1461,12 @@ def _lookup_one_token_cont(req: Tuple[Tuple[str, str], List[int], List[int]]): if self.AUTO_MODEL_CLASS == transformers.AutoModelForCausalLM else None ) - cont_toks = torch.tensor( - cont_toks, dtype=torch.long - ).unsqueeze(0) # [1, seq] + cont_toks = torch.tensor(cont_toks, dtype=torch.long).unsqueeze(0) # [1, seq] greedy_tokens = torch.tensor( - self._select_cont_toks(greedy_tokens, contlen=contlen, inplen=ctx_len), - dtype=torch.long - ).unsqueeze(0) # [1, seq] + self._select_cont_toks(greedy_tokens, contlen=contlen, inplen=ctx_len), dtype=torch.long + ).unsqueeze( + 0 + ) # [1, seq] max_equal = (greedy_tokens == cont_toks).all() cont_logprobs = self._select_cont_toks(logprobs, contlen=contlen, inplen=ctx_len) @@ -1523,9 +1496,7 @@ def device(self): raise NotImplementedError() def loglikelihood_rolling(self, requests, disable_tqdm: bool = False): - raise NotImplementedError( - "loglikelihood_rolling not yet supported for GenAI service" - ) + raise NotImplementedError("loglikelihood_rolling not yet supported for GenAI service") def generate_until(self, requests, disable_tqdm: bool = False) -> List[str]: raise NotImplementedError("Not supported yet.") From 2278bdd28f06477784e01c9bcc35a8580cffb1a1 Mon Sep 17 00:00:00 2001 From: "Lv, Kaokao" Date: Wed, 22 May 2024 18:39:14 +0800 Subject: [PATCH 3/7] fix typo. --- .../lm_evaluation_harness/lm_eval/models/huggingface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py b/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py index 10554b23..2da11d89 100644 --- a/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py +++ b/GenAIEval/evaluation/lm_evaluation_harness/lm_eval/models/huggingface.py @@ -1471,7 +1471,7 @@ def _lookup_one_token_cont(req: Tuple[Tuple[str, str], List[int], List[int]]): response.raise_for_status() response = response.json() except RequestException as e: - logger.error(f"RequestException: {e}") + eval_logger.error(f"RequestException: {e}") for (request_str, ctx_tokens, _), greedy_tokens, logprobs, inplen, cont_toks in zip( chunk, response["greedy_tokens"], response["logprobs"],inplens, cont_toks_list From d997e45dd64842cbc777732287942c8f22bc18ec Mon Sep 17 00:00:00 2001 From: VincyZhang Date: Wed, 22 May 2024 19:53:16 +0800 Subject: [PATCH 4/7] Update model_test_cpu.yml --- .github/workflows/model_test_cpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/model_test_cpu.yml b/.github/workflows/model_test_cpu.yml index a687d7ff..ed70411d 100644 --- a/.github/workflows/model_test_cpu.yml +++ b/.github/workflows/model_test_cpu.yml @@ -131,7 +131,7 @@ jobs: id: download-artifact uses: dawidd6/action-download-artifact@v3.1.2 with: - workflow: model-test.yml + workflow: model_test_cpu.yml name: FinalReport run_id: ${{ vars.ModelTest_CPU_REF_ID }} path: ${{ env.OUT_SCRIPT_PATH }} From a7aa198997b91f2cc779517b07d2c618a97edb4f Mon Sep 17 00:00:00 2001 From: VincyZhang Date: Wed, 22 May 2024 19:54:04 +0800 Subject: [PATCH 5/7] Update model_test_hpu.yml --- .github/workflows/model_test_hpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/model_test_hpu.yml b/.github/workflows/model_test_hpu.yml index 82a10f79..1e6f2316 100644 --- a/.github/workflows/model_test_hpu.yml +++ b/.github/workflows/model_test_hpu.yml @@ -119,7 +119,7 @@ jobs: id: download-artifact uses: dawidd6/action-download-artifact@v3.1.2 with: - workflow: model-test.yml + workflow: model_test_hpu.yml name: FinalReport run_id: ${{ vars.ModelTest_HPU_REF_ID }} path: ${{ env.OUT_SCRIPT_PATH }} From fcb224a76130d9ffc0c6767c0c60499ebaaf2372 Mon Sep 17 00:00:00 2001 From: VincyZhang Date: Wed, 22 May 2024 20:16:41 +0800 Subject: [PATCH 6/7] Update unittest.yml --- .github/workflows/unittest.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 261b775f..a8403784 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -62,7 +62,7 @@ jobs: - name: Docker Build run: | - docker build -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} . + docker build --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f ${{ github.workspace }}/.github/workflows/docker/common.dockerfile -t ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} . - name: Docker Run run: | @@ -71,6 +71,7 @@ jobs: docker rm -vf ${{ env.CONTAINER_NAME }} || true fi docker run -dit --memory="4g" --memory-reservation="1g" --disable-content-trust --privileged --name=${{ env.CONTAINER_NAME }} --shm-size="1g" \ + -e http_proxy="${{ env.HTTP_PROXY_CONTAINER_RUN }}" -e https_proxy="${{ env.HTTPS_PROXY_CONTAINER_RUN }}" \ -v ${{ github.workspace }}:/GenAIEval ${{ env.DOCKER_NAME }}:${{ env.DOCKER_TAG }} - name: Install Dependencies From df124ccf2e26a77d4b1b725ec1de079f4ed114c9 Mon Sep 17 00:00:00 2001 From: VincyZhang Date: Wed, 22 May 2024 20:22:59 +0800 Subject: [PATCH 7/7] Update unittest.yml --- .github/workflows/unittest.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index a8403784..af5770c9 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -52,7 +52,9 @@ jobs: steps: - name: Clean Up Working Directory run: sudo rm -rf ${{github.workspace}}/* - + - name: Load environment variables + run: + cat ~/actions-runner4/.env >> $GITHUB_ENV - name: Checkout out Repo uses: actions/checkout@v4 with: