-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_inference.py
95 lines (75 loc) · 3.37 KB
/
run_inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import json
import subprocess
# Llama-cli parameters for the three tested models
# DeepSeek model configuration
MODEL_PATH = "deepseek-coder-6.7b-instruct.Q4_K_M.gguf"
NGL = 30
MAX_TOKENS = 512
MODEL_NAME = "deepseek-coder-6.7b"
# # Qwen model configuration
# MODEL_PATH = "qwen2.5-coder-7b-instruct-q4_k_m.gguf"
# NGL = 40
# MAX_TOKENS = 512
# MODEL_NAME = "qwen2.5-coder-7b"
# # Stable code model configuration
# MODEL_PATH = "stable-code-instruct-3b.Q4_K_M.gguf"
# NGL = 40
# MAX_TOKENS = 512
# MODEL_NAME = "stable-code-3b"
def load_dataset(input_file, output_file, fewshot_file):
with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'r', encoding='utf-8') as outfile, open(fewshot_file, 'r', encoding='utf-8') as fewshotfile:
descriptions = infile.readlines()
assembly_codes = outfile.readlines()
fewshot_examples = fewshotfile.read()
if len(descriptions) != len(assembly_codes):
raise ValueError("Input and output files must have the same number of lines.")
dataset = [(desc.strip(), code.strip()) for desc, code in zip(descriptions, assembly_codes)]
return dataset, fewshot_examples
def build_instruction_prompt(fewshot: str, instruction: str):
return '''You are an expert in IA-32 assembly code generation. You have to generate an IA-32 assembly code snippet starting from its natural language description. I will provide a code description, generate just the corresponding assembly code. The _BREAK tokens separate previous code descriptions from the current code description. Just translate the last description.
### Instruction:
Description: {}
### Response:
Assembly Code:'''.format(instruction)
def run_inference(prompt: str, model_path: str, ngl: int, max_tokens: int):
"""
Run inference using llama-cli for a given prompt.
"""
command = [
"./llama.cpp/llama-cli",
"-m", model_path,
"-co",
"-fa",
"-ngl", str(ngl),
"-n", str(max_tokens),
"-p", prompt,
]
try:
result = subprocess.run(command, capture_output=True, text=True, check=True)
return result.stdout.strip()
except subprocess.CalledProcessError as e:
print(f"Error running inference for prompt: {prompt}")
print(f"Error: {e.stderr}")
return "ERROR"
def perform_inference(inputs_file: str, outputs_file: str, output_json: str):
testset = load_dataset(inputs_file, outputs_file)
prompts = [build_instruction_prompt(testinput[0]) for testinput in testset]
snippets = [testinput[1] for testinput in testset]
predictions = []
for prompt in enumerate(prompts):
pred = run_inference(prompt, MODEL_PATH, NGL, MAX_TOKENS)
# print(pred)
predictions.append(pred)
results = [{"reference": snippet, "prediction": prediction} for snippet, prediction in zip(snippets, predictions)]
with open(f"{MODEL_NAME}_{output_json}", "w", encoding="utf-8") as json_file:
json.dump(results, json_file, indent=4, ensure_ascii=False)
return predictions, snippets
def perform_experiment(inputs_file, outputs_file, output_json):
predictions, references = perform_inference(inputs_file, outputs_file, output_json)
# compute_scores(predictions, references, output_json)
# Modify with correct files
perform_experiment(
inputs_file="",
outputs_file="",
output_json=""
)