run_inference.py

import json
import subprocess

# Llama-cli parameters for the three tested models

# DeepSeek model configuration
MODEL_PATH = "deepseek-coder-6.7b-instruct.Q4_K_M.gguf"  
NGL = 30  
MAX_TOKENS = 512 
MODEL_NAME = "deepseek-coder-6.7b"  

# # Qwen model configuration
# MODEL_PATH = "qwen2.5-coder-7b-instruct-q4_k_m.gguf" 
# NGL = 40 
# MAX_TOKENS = 512 
# MODEL_NAME = "qwen2.5-coder-7b" 

# # Stable code model configuration
# MODEL_PATH = "stable-code-instruct-3b.Q4_K_M.gguf" 
# NGL = 40  
# MAX_TOKENS = 512  
# MODEL_NAME = "stable-code-3b" 


def load_dataset(input_file, output_file, fewshot_file):
    with open(input_file, 'r', encoding='utf-8') as infile, open(output_file, 'r', encoding='utf-8') as outfile, open(fewshot_file, 'r', encoding='utf-8') as fewshotfile:
        descriptions = infile.readlines()
        assembly_codes = outfile.readlines()
        fewshot_examples = fewshotfile.read()
    
    if len(descriptions) != len(assembly_codes):
        raise ValueError("Input and output files must have the same number of lines.")
    
    dataset = [(desc.strip(), code.strip()) for desc, code in zip(descriptions, assembly_codes)]
    return dataset, fewshot_examples


def build_instruction_prompt(fewshot: str, instruction: str):
    return '''You are an expert in IA-32 assembly code generation. You have to generate an IA-32 assembly code snippet starting from its natural language description. I will provide a code description, generate just the corresponding assembly code. The _BREAK tokens separate previous code descriptions from the current code description. Just translate the last description.
### Instruction:
Description: {}
### Response:
Assembly Code:'''.format(instruction)

def run_inference(prompt: str, model_path: str, ngl: int, max_tokens: int):
    """
    Run inference using llama-cli for a given prompt.
    """
    command = [
        "./llama.cpp/llama-cli",
        "-m", model_path,
        "-co",  
        "-fa", 
        "-ngl", str(ngl),  
        "-n", str(max_tokens), 
        "-p", prompt,
    ]
    
    try:
        result = subprocess.run(command, capture_output=True, text=True, check=True)
        return result.stdout.strip()
    except subprocess.CalledProcessError as e:
        print(f"Error running inference for prompt: {prompt}")
        print(f"Error: {e.stderr}")
        return "ERROR"

def perform_inference(inputs_file: str, outputs_file: str,  output_json: str):
    testset = load_dataset(inputs_file, outputs_file)
    prompts = [build_instruction_prompt(testinput[0]) for testinput in testset]
    snippets = [testinput[1] for testinput in testset]

    predictions = []
    for prompt in enumerate(prompts):
        pred = run_inference(prompt, MODEL_PATH, NGL, MAX_TOKENS)
        # print(pred)
        predictions.append(pred)
    
    results = [{"reference": snippet, "prediction": prediction} for snippet, prediction in zip(snippets, predictions)]

    with open(f"{MODEL_NAME}_{output_json}", "w", encoding="utf-8") as json_file:
        json.dump(results, json_file, indent=4, ensure_ascii=False)

    return predictions, snippets


def perform_experiment(inputs_file, outputs_file, output_json):
    predictions, references = perform_inference(inputs_file, outputs_file, output_json)
    # compute_scores(predictions, references, output_json)

# Modify with correct files
perform_experiment(
    inputs_file="",
    outputs_file="",
    output_json=""
)