diff --git a/.env.example b/.env.example index 3e9186e..7df2318 100644 --- a/.env.example +++ b/.env.example @@ -19,6 +19,9 @@ FRAC_GREEDY=1.0 # number of input requests to generate (virtual users will sample from these) SAMPLE_SIZE=1 +# use code prompts (rather than normal text) +CODE=false + # requests file REQUESTS_FILENAME=sample_requests.json diff --git a/fmperf/WorkloadSpecs.py b/fmperf/WorkloadSpecs.py index 690469e..53cefab 100644 --- a/fmperf/WorkloadSpecs.py +++ b/fmperf/WorkloadSpecs.py @@ -8,11 +8,13 @@ def __init__( image: str = "quay.io/fmperf/fmperf:main", pvc_name: str = None, overwrite: bool = False, + code: bool = False, ): self.sample_size = sample_size self.image = image self.pvc_name = pvc_name self.overwrite = overwrite + self.code = code @classmethod def from_yaml(cls, file: str): @@ -44,6 +46,10 @@ def get_env( "name": "OVERWRITE", "value": str(self.overwrite), }, + { + "name": "CODE", + "value": str(self.code), + }, {"name": "REQUESTS_FILENAME", "value": outfile}, ] return env @@ -58,12 +64,13 @@ def __init__( image: str = "fmperf-project/fmperf:local", pvc_name: str = None, overwrite: bool = False, + code: bool = False, ): self.input_tokens = input_tokens self.output_tokens = output_tokens self.greedy = greedy - super().__init__(1, image, pvc_name, overwrite) + super().__init__(1, image, pvc_name, overwrite, code) @classmethod def from_yaml(cls, file: str): @@ -115,13 +122,14 @@ def __init__( image: str = "quay.io/fmperf/fmperf:main", pvc_name: str = None, overwrite: bool = False, + code: bool = False, ): self.min_input_tokens = min_input_tokens self.max_input_tokens = max_input_tokens self.min_output_tokens = min_output_tokens self.max_output_tokens = max_output_tokens self.frac_greedy = frac_greedy - super().__init__(sample_size, image, pvc_name, overwrite) + super().__init__(sample_size, image, pvc_name, overwrite, code) @classmethod def from_yaml(cls, file: str): @@ -168,8 +176,9 @@ def __init__( image: str = "quay.io/fmperf/fmperf:main", pvc_name: str = None, overwrite: bool = False, + code: bool = False, ): - super().__init__(sample_size, image, pvc_name, overwrite) + super().__init__(sample_size, image, pvc_name, overwrite, code) @classmethod def from_yaml(cls, file: str): diff --git a/fmperf/loadgen/generate-input.py b/fmperf/loadgen/generate-input.py index cb0fa8c..e79d06f 100644 --- a/fmperf/loadgen/generate-input.py +++ b/fmperf/loadgen/generate-input.py @@ -14,8 +14,17 @@ import traceback from transformers import AutoTokenizer -# read in seed text -seed_text_file = impresources.files(fmperf.data) / "ai.txt" +code = os.getenv("CODE", "false").lower() != "false" + +if code: + import fmperf + + tmp = fmperf.__file__.split("/")[:-1] + tmp.append("Cluster.py") + seed_text_file = "/".join(tmp) +else: + seed_text_file = impresources.files(fmperf.data) / "ai.txt" + with open(seed_text_file, "r") as f: text = f.read()