From b7d288a47b0690dabda3fddafd7658f7926e9823 Mon Sep 17 00:00:00 2001 From: Johannes Kiesel Date: Wed, 19 Jun 2024 15:24:28 +0200 Subject: [PATCH] prompted evaluator --- docs/config.yml | 5 +- docs/index.html | 1598 +++++++++-------- src/evaluators/prompted-evaluator.js | 53 + src/evaluators/readability-evaluator.js | 2 +- src/index.js | 8 +- src/llm.js | 45 +- src/systems/generative-elastic-system.js | 3 +- src/templates.js | 2 +- .../discussion-configuration.json | 106 ++ static/css/main.css | 2 + static/css/messages.css | 3 + static/js/configuration.js | 2 +- static/js/simulation.js | 29 +- 13 files changed, 1114 insertions(+), 744 deletions(-) create mode 100644 src/evaluators/prompted-evaluator.js create mode 100644 static/configurations/discussion-configuration.json diff --git a/docs/config.yml b/docs/config.yml index 40c7ca9..4f914be 100644 --- a/docs/config.yml +++ b/docs/config.yml @@ -7,6 +7,7 @@ toc: - evaluate - name: Evaluators - Evaluator + - PromptedEvaluator - ReadabilityEvaluator - name: Systems - System @@ -15,14 +16,14 @@ toc: - User - StaticUser - name: Utility - - LLM - - Logbook - templates - name: Types - Evaluation - EvaluationResult - EVALUATION_RESULT + - LLM - LLMConfiguration + - Logbook - LogbookEntry - Simulation - SystemResponse diff --git a/docs/index.html b/docs/index.html index ce61239..355c3ba 100644 --- a/docs/index.html +++ b/docs/index.html @@ -106,6 +106,16 @@

@webis-de/gen-ir-sim

+
  • + PromptedEvaluator + + + +
  • + +
  • @@ -246,86 +256,6 @@

    @webis-de/gen-ir-sim

  • -
  • - LLM - - - - - -
  • - - -
  • - Logbook - - - - - -
  • - -
  • @@ -431,6 +361,58 @@

    @webis-de/gen-ir-sim

    + + +
  • + + +
  • + LLM + + + +
  • @@ -446,6 +428,34 @@

    @webis-de/gen-ir-sim

    +
  • + Logbook + + + + + +
  • + +
  • @@ -663,7 +673,7 @@

    -
    + src/index.js @@ -897,7 +907,7 @@

    - + src/index.js @@ -1119,7 +1129,7 @@

    - + src/index.js @@ -1300,7 +1310,7 @@

    - + src/evaluator.js @@ -1378,7 +1388,7 @@

    - + src/evaluator.js @@ -1492,21 +1502,21 @@

    -

    An evaluator that measures the readability of the system response.

    +

    An evaluator that prompts a language model for a score.

    -
    new ReadabilityEvaluator(configuration: Object, log: Logbook)
    +
    new PromptedEvaluator(configuration: Object, log: Logbook)
    @@ -1542,10 +1552,40 @@

    - configuration.measure string + configuration.llm LLMConfiguration - The key of the measure that should be -used to calculate the score + The configuration for the +language model to be prompted + + + + + + + configuration.promt string + + Template for the prompt to evaluate +the system response. Variables: +
      +
    • {{x}}: A property x of the configuration for the evaluator
    • +
    • {{variables.simulation}}: The entire Simulation
    • +
    • {{variables.userTurn}}: The specific user turn, especially with +variables.userTurn.utterance and +variables.userTurn.SystemResponse.utterance
    • +
    +
    + + + + + + configuration.requiredKeys Array? + + The properties +that the language model's response must have (in addition to + +EVALUATION_RESULT.SCORE +) @@ -1590,49 +1630,26 @@

    -
    - -

    - Systems -

    - - - - -
    - - -
    -

    A generative information retrieval system.

    -

    Systems can be stateful. However, users are not differentiated: the system -can assume it is used by exactly one user. A separate system object must be -instantiated for each simulated user.

    -

    The constructor of a system must have two parameters:

    -
      -
    • The configuration that has to be passed via super(configuration) and -is then available via this.configuration.
    • -
    • A Logbook that can be used to log the -initialization process.
    • -
    +

    An evaluator that measures the readability of the system response.

    -
    new System(configuration: Object)
    +
    new ReadabilityEvaluator(configuration: Object, log: Logbook)
    @@ -1650,14 +1667,140 @@

    configuration (Object) - The configuration for the system + The configuration for the evaluator
    -
    - -

    - + + + + + + + + + + + + + + + + + + + + + +
    NameDescription
    configuration.measure string + The key of the measure that should be +used to calculate the score +
    + + + +
    +
    + log (Logbook) + A function that takes log messages + +
    + +
    + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +

    + Systems +

    + + + + +
    + + + +
    + + +
    + +

    + System +

    + + + + src/system.js + + +
    + + +

    A generative information retrieval system.

    +

    Systems can be stateful. However, users are not differentiated: the system +can assume it is used by exactly one user. A separate system object must be +instantiated for each simulated user.

    +

    The constructor of a system must have two parameters:

    +
      +
    • The configuration that has to be passed via super(configuration) and +is then available via this.configuration.
    • +
    • A Logbook that can be used to log the +initialization process.
    • +
    + +
    new System(configuration: Object)
    + + + + + + + + + + + +
    Parameters
    +
    + +
    +
    + configuration (Object) + The configuration for the system + +
    + +
    + +
    + @@ -1689,7 +1832,7 @@

    - + src/system.js @@ -1796,7 +1939,7 @@

    - + src/systems/generative-elastic-system.js @@ -1988,8 +2131,9 @@

    configuration.generation.requiredKeys Array? The properties -that the generation response must have (at least -utterance +that the generated response must have (in addition to + +SYSTEM_RESPONSE.UTTERANCE ) @@ -2046,7 +2190,7 @@

    - + src/systems/generative-elastic-system.js @@ -2164,7 +2308,7 @@

    - + src/user.js @@ -2242,7 +2386,7 @@

    - + src/user.js @@ -2351,7 +2495,7 @@

    - + src/user.js @@ -2466,7 +2610,7 @@

    - + src/users/static-user.js @@ -2608,21 +2752,21 @@

    -

    A large language model.

    +

    Static methods for filling in text templates.

    -
    new LLM(configuration: LLMConfiguration, logbook: Logbook)
    +
    templates
    @@ -2634,31 +2778,6 @@

    -
    Parameters
    -
    - -
    -
    - configuration (LLMConfiguration) - The configuration object - -
    - -
    - -
    -
    - logbook (Logbook) - The logbook to log to - -
    - -
    - -
    - - - @@ -2671,14 +2790,14 @@

    -
    Instance Members
    +
    Static Members
    -
    +
    - createAssistantMessage(message) + render(text, context, options = undefined)
    - +
    +
    + context (Object) + The values of the variables that can be referenced + +
    + +
    + +
    +
    + options (Object? + = undefined) + Replacement options + +
    + + + + + + + + + + + + + + + + + + + + + +
    NameDescription
    options.ignoreMissing boolean? + Whether to ignore if a reference +variable does not exist in the context (not changing the text) instead of +throwing an error +
    + +
    + +
    - -
    Returns
    - Object: - The message object - - @@ -2754,11 +2917,11 @@

    -
    +
    - createSystemMessage(message) + joinMessages(messages)
    -
    +
    - createUserMessage(message) + joinProperties(object, keys = undefined)
    - +
    +
    + keys (Array? + = undefined) + The names of the properties to convert, if not all +(the default) + +
    + +
    + +
    +
    Returns
    - Object: - The message object + string: + The converted object @@ -2914,11 +3088,11 @@

    -
    +
    - chat(messages, action) + tsv2Contexts(tsv)
    -
    -
    -
    - - json(messages, action, requiredKeys = [], maxRetries = 3) -
    -
    - + + + + + + + + +

    + + + + +
    + +

    + Types +

    + + + + +
    + + + +
    -

    Generates a chat completion in JSON format.

    +

    Object that represents the evaluation of a simulation.

    -
    json(messages: Array, action: string, requiredKeys: Array?, maxRetries: number?): Object
    +
    Evaluation
    + +

    + Type: + Object +

    @@ -3047,56 +3235,45 @@

    -
    Parameters
    -
    + + +
    Properties
    +
    -
    - messages (Array) - The message history for the completion, use - -LLM#createSystemMessage -, -LLM#createUserMessage -, and - -LLM#createAssistantMessage - to create these + configuration (Object) + : The configuration of the evaluation -
    -
    - action (string) - Name of the action for which the text is -generated, used for logging + simulation (Simulation) + : The simulation that was evaluated -
    -
    - requiredKeys (Array? - = []) - Names of properties that the parsed JSON -completion must have + userTurnsEvaluations (Array) + : For each user turn of the +simulation, in order, an object where the keys are the names of the +configured evaluators (if they evaluated the specific turn of the simulation) +and the values are the respective +EvaluationResult +s -
    -
    - maxRetries (number? - = 3) - Maximum number of times to retry the -completion (if it can not be parsed and is missing a required key) before -throwing an error + overallEvaluations (Object) + : An object where the keys are the +names of the configured evaluators (if they evaluated the overall simulation) +and the values are the respective +EvaluationResult +s -
    @@ -3106,20 +3283,6 @@

    - -
    Returns
    - Object: - The completion as parsed object - - - - - - - - - - @@ -3128,14 +3291,6 @@

    -

    - - - - - - - @@ -3150,21 +3305,26 @@

    -

    - Logbook +

    + EvaluationResult

    - - src/logbook.js + + src/evaluator.js
    -

    A logbook to log actions specific to one source.

    +

    Object returned by Evaluator#evaluate with at least a score.

    -
    new Logbook(source: string, callback: function?, prefix: string?)
    +
    EvaluationResult
    + +

    + Type: + Object +

    @@ -3176,37 +3336,16 @@

    -
    Parameters
    -
    - -
    -
    - source (string) - The source for which to log entries - -
    - -
    - -
    -
    - callback (function?) - An optional function to call with each - -LogbookEntry - created on -Logbook#log -
    - -
    + +
    Properties
    +
    -
    - prefix (string?) - An optional prefix to the action logged + score (number) + : A number between 0 and 1, with higher values +indicating better responses -
    @@ -3226,33 +3365,33 @@

    -
    Instance Members
    -
    + -
    -
    -
    - - log(action, object?) -
    -
    - +
    + +
    +
    +
    + + EXPLANATION +
    +
    + - + +
    + + + + + + + + + + + + + +
    -

    Replaces occurrences of {{path.to.variable}} in the text with the -corresponding values in the context object (e.g., replace with -context["path"]["to"]["variable"]).

    -

    If the input is not a string but an object or array, it is recursively cloned -and occurences in the contents are replaced. Numbers, boolean, etc. are -shallow copied.

    +

    A large language model.

    -
    render(text: any, context: Object, options: Object?)
    +
    new LLM(configuration: LLMConfiguration, logbook: Logbook)
    @@ -3422,18 +3584,8 @@

    - text (any) - The template string or an object or array structure that -contains template strings (among others) - -
    - -
    - -
    -
    - context (Object) - The values of the variables that can be referenced + configuration (LLMConfiguration) + The configuration object
    @@ -3441,39 +3593,11 @@

    - options (Object? - = undefined) - Replacement options + logbook (Logbook) + The logbook to log to
    - - - - - - - - - - - - - - - - - - - - - -
    NameDescription
    options.ignoreMissing boolean? - Whether to ignore if a reference -variable does not exist in the context (not changing the text) instead of -throwing an error -
    -

    @@ -3492,20 +3616,14 @@

    - - - - -

    - -
    -

    +
    Instance Members
    +
    -
    +
    - joinMessages(messages) + createAssistantMessage(message)
    -
    +
    - joinProperties(object, keys = undefined) + createSystemMessage(message)
    -
    +
    - tsv2Contexts(tsv) + createUserMessage(message)
    -
    +
    +
    +
    + + chat(messages, action) +
    +
    + +
    + +
    +
    +
    + + json(messages, action, requiredKeys = [], maxRetries = 3) +
    +
    + +
    + +
    + + + + + + @@ -3889,21 +4095,26 @@

    -

    - EvaluationResult +

    + LLMConfiguration

    - - src/evaluator.js + + src/llm.js
    -

    Object returned by Evaluator#evaluate with at least a score.

    +

    Configuration for an LLM.

    +

    Properties are url (see below) and all paramters for the chat completion +endpoint, which includes the required model, but also optional parameters +like options.temperature (see the +modelfile parameter +of Ollama).

    -
    EvaluationResult
    +
    LLMConfiguration

    Type: @@ -3926,9 +4137,15 @@

    - score (number) - : A number between 0 and 1, with higher values -indicating better responses + url (string) + : The complete URL of the LLM's chat API endpoint + + +
    + +
    + model (string) + : The large language model name as per the API
    @@ -3961,21 +4178,21 @@

    -

    - EVALUATION_RESULT +

    + Logbook

    - - src/evaluator.js + + src/logbook.js
    -

    Constants for EvaluationResult property names.

    +

    A logbook to log actions specific to one source.

    -
    EVALUATION_RESULT
    +
    new Logbook(source: string, callback: function?, prefix: string?)
    @@ -3987,59 +4204,41 @@

    +
    Parameters
    +
    + +
    +
    + source (string) + The source for which to log entries - - - - - - - - - - - -
    Static Members
    -
    - -
    -
    -
    - - SCORE +
    +
    -
    - @@ -4055,16 +4254,14 @@

    - - -

    -
    +
    Instance Members
    +
    -
    +
    - EXPLANATION + log(action, object?)
    -
    - - - - - - - - - -
    - - -
    -

    - LLMConfiguration -

    - - - - src/llm.js - - -
    - - -

    Configuration for an LLM.

    -

    Properties are url (see below) and all paramters for the chat completion -endpoint, which includes the required model, but also optional parameters -like options.temperature (see the -modelfile parameter -of Ollama).

    - -
    LLMConfiguration
    - -

    - Type: - Object -

    - - +
    Returns
    + LogbookEntry: + The logged entry - - - - - + + -
    Properties
    -
    - -
    - url (string) - : The complete URL of the LLM's chat API endpoint - -
    - -
    - model (string) - : The large language model name as per the API - - -
    - -
    @@ -4202,8 +4342,12 @@

    +

    +
    +
    +
    @@ -4225,7 +4369,7 @@

    - + src/logbook.js @@ -4310,7 +4454,7 @@

    - + src/logbook.js @@ -4373,7 +4517,7 @@

    - + src/logbook.js @@ -4436,7 +4580,7 @@

    - + src/logbook.js @@ -4499,7 +4643,7 @@

    - + src/logbook.js @@ -4571,7 +4715,7 @@

    - + src/logbook.js @@ -4652,7 +4796,7 @@

    - + src/logbook.js @@ -4718,7 +4862,7 @@

    - + src/logbook.js @@ -4784,7 +4928,7 @@

    - + src/logbook.js @@ -4858,7 +5002,7 @@

    - + src/index.js @@ -4939,7 +5083,7 @@

    - + src/system.js @@ -5011,7 +5155,7 @@

    - + src/system.js @@ -5061,7 +5205,7 @@

    - + src/system.js @@ -5119,7 +5263,7 @@

    - + src/system.js @@ -5177,7 +5321,7 @@

    - + src/system.js @@ -5245,7 +5389,7 @@

    - + src/index.js @@ -5317,7 +5461,7 @@

    - + src/user.js @@ -5398,7 +5542,7 @@

    - + src/user.js @@ -5448,7 +5592,7 @@

    - + src/user.js @@ -5506,7 +5650,7 @@

    - + src/user.js diff --git a/src/evaluators/prompted-evaluator.js b/src/evaluators/prompted-evaluator.js new file mode 100644 index 0000000..a1216c7 --- /dev/null +++ b/src/evaluators/prompted-evaluator.js @@ -0,0 +1,53 @@ +import { Evaluator, EVALUATION_RESULT } from "../evaluator.js"; +import { LLM } from "../llm.js"; +import { Logbook } from "../logbook.js"; +import { render } from "../templates.js"; +import { SYSTEM_RESPONSE } from "../system.js"; +import { USER_TURN } from "../user.js"; + +/** + * An evaluator that prompts a language model for a score. + * + * @class PromptedEvaluator + * @param {Object} configuration - The configuration for the evaluator + * @param {LLMConfiguration} configuration.llm - The configuration for the + * language model to be prompted + * @param {string} configuration.promt - Template for the prompt to evaluate + * the system response. Variables: + * - `{{x}}`: A property `x` of the configuration for the evaluator + * - `{{variables.simulation}}`: The entire {@link Simulation} + * - `{{variables.userTurn}}`: The specific user turn, especially with + * `variables.userTurn.utterance` and + * `variables.userTurn.SystemResponse.utterance` +* @param {Array} [configuration.requiredKeys] - The properties + * that the language model's response must have (in addition to + * {@link EVALUATION_RESULT.SCORE}) + * @param {Logbook} log - A function that takes log messages + */ +export class PromptedEvaluator extends Evaluator { + + constructor(configuration, logbook) { + super(configuration); + } + + async evaluate(simulation, userTurnIndex, logbook) { + if (userTurnIndex === undefined) { + return null; // does not evaluate overall simulation + } + + const llm = new LLM(this.configuration.llm, logbook); + + const context = Object.assign({variables:{}}, this.configuration); + context.variables.simulation = simulation; + context.variables.userTurn = simulation.userTurns[userTurnIndex]; + const messages = + [ llm.createUserMessage(render(this.configuration.prompt, context)) ]; + + const requiredKeys = (this.configuration.requiredKeys || []) + .concat([ EVALUATION_RESULT.SCORE ]); + + return await llm.json(messages, "prompting", requiredKeys); + } + +} + diff --git a/src/evaluators/readability-evaluator.js b/src/evaluators/readability-evaluator.js index c07297f..83c5385 100644 --- a/src/evaluators/readability-evaluator.js +++ b/src/evaluators/readability-evaluator.js @@ -341,7 +341,7 @@ export class ReadabilityEvaluator extends Evaluator { async evaluate(simulation, userTurnIndex, logbook) { if (userTurnIndex === undefined) { - return null; // only evaluates last response + return null; // does not evaluate overall simulation } const result = calculateReadability(simulation.userTurns[userTurnIndex][USER_TURN.SYSTEM_RESPONSE][SYSTEM_RESPONSE.UTTERANCE]); diff --git a/src/index.js b/src/index.js index 5ce0cff..7fbe4a4 100644 --- a/src/index.js +++ b/src/index.js @@ -12,8 +12,10 @@ const systems = { GenerativeElasticSystem } +import { PromptedEvaluator } from "./evaluators/prompted-evaluator.js"; import { ReadabilityEvaluator } from "./evaluators/readability-evaluator.js"; const evaluators = { + PromptedEvaluator, ReadabilityEvaluator } @@ -168,7 +170,6 @@ export async function evaluate(simulation, configuration, options = undefined) { const logCallback = (options || {}).logCallback || defaultLogCallback; const additionalEvaluators = (options || {}).additionalEvaluators || {}; const controllerLogbook = new Logbook("controller", logCallback); - controllerLogbook.log("evaluate"); const logbook = new Logbook("evaluation", logCallback); const availableEvaluators = @@ -184,15 +185,16 @@ export async function evaluate(simulation, configuration, options = undefined) { const userTurnsEvaluations = []; for (let userTurnIndex = 0; userTurnIndex < simulation.userTurns.length; userTurnIndex += 1) { - logbook.log("turn " + userTurnIndex); + controllerLogbook.log("evaluate turn " + userTurnIndex); if (simulation.userTurns[userTurnIndex].systemResponse !== undefined) { const evaluations = await evaluateTurn(instantiatedEvaluators, logbook, simulation, userTurnIndex); userTurnsEvaluations.push(evaluations); } } - logbook.log("overall"); + controllerLogbook.log("evaluate overall simulation"); const overallEvaluations = await evaluateTurn(instantiatedEvaluators, logbook, simulation); + controllerLogbook.log("done"); return { configuration: configuration, simulation: simulation, diff --git a/src/llm.js b/src/llm.js index ef2f407..ae94ae3 100644 --- a/src/llm.js +++ b/src/llm.js @@ -64,6 +64,7 @@ async function generate(messages, configuration, logbook, action) { } function parseJson(message, requiredKeys) { + let failed = 0; let processedMessage = message .trim() .replace(/^```(json)?/, "") @@ -72,7 +73,36 @@ function parseJson(message, requiredKeys) { try { return JSON.parse(processedMessage); } catch (error) { - console.error("Invalid json (after 1): " + processedMessage); + failed += 1; + console.error("Invalid json (after " + failed + "): " + processedMessage); + } + + /* + * Something else after the JSON + */ + processedMessage = processedMessage.replaceAll( + /}\n\n.*/g, + '}' + ); + try { + return JSON.parse(processedMessage); + } catch (error) { + failed += 1; + console.error("Invalid json (after " + failed + "): " + processedMessage); + } + + /* + * - key="foo": "bar", + */ + processedMessage = processedMessage.replaceAll( + /-?\s*['"]?key['"]?\s*[=]\s*['"]([^'"]*)['"]\s*:/g, + '"$1":' + ); + try { + return JSON.parse(processedMessage); + } catch (error) { + failed += 1; + console.error("Invalid json (after " + failed + "): " + processedMessage); } /* @@ -82,13 +112,14 @@ function parseJson(message, requiredKeys) { * } */ processedMessage = processedMessage.replaceAll( - /(- )?['"]?key['"]?\s*[:=]\s*['"]([^'"]*)['"],\s*['"]?value['"]?\s*[:=]\s*/g, - '"$2":' + /-?\s*?['"]?key['"]?\s*[:=]\s*['"]([^'"]*)['"],\s*['"]?value['"]?\s*[:=]\s*/g, + '"$1":' ); try { return JSON.parse(processedMessage); } catch (error) { - console.error("Invalid json (after 2): " + processedMessage); + failed += 1; + console.error("Invalid json (after " + failed + "): " + processedMessage); } /* @@ -103,7 +134,8 @@ function parseJson(message, requiredKeys) { try { return JSON.parse(processedMessage); } catch (error) { - console.error("Invalid json (after 3): " + processedMessage); + failed += 1; + console.error("Invalid json (after " + failed + "): " + processedMessage); } /* @@ -113,7 +145,8 @@ function parseJson(message, requiredKeys) { try { return JSON.parse(processedMessage); } catch (error) { - console.error("Invalid json (after 4): " + processedMessage); + failed += 1; + console.error("Invalid json (after " + failed + "): " + processedMessage); throw error; } } diff --git a/src/systems/generative-elastic-system.js b/src/systems/generative-elastic-system.js index 784156f..fc10213 100644 --- a/src/systems/generative-elastic-system.js +++ b/src/systems/generative-elastic-system.js @@ -71,7 +71,8 @@ async function queryElastic(query, searchConfiguration, logbook) { * @param {Array} [configuration.generation.searchResultKeys] - The properties * of each result that are used to render the result in the generation message * @param {Array} [configuration.generation.requiredKeys] - The properties - * that the generation response must have (at least `utterance`) + * that the generated response must have (in addition to + * {@link SYSTEM_RESPONSE.UTTERANCE}) * @param {Logbook} log - A function that takes log messages */ export class GenerativeElasticSystem extends System { diff --git a/src/templates.js b/src/templates.js index 34a1b95..a625ed4 100644 --- a/src/templates.js +++ b/src/templates.js @@ -46,7 +46,7 @@ export function render(text, context, options = undefined) { for (let p = 0; p < path.length; p += 1) { scope = scope[path[p]]; if (scope === undefined) { - if (options.ignoreMissing) { + if (options !== undefined && options.ignoreMissing) { output += "{{" + path.join(".") + "}}"; continue matchloop; } else { diff --git a/static/configurations/discussion-configuration.json b/static/configurations/discussion-configuration.json new file mode 100644 index 0000000..7b7e4c1 --- /dev/null +++ b/static/configurations/discussion-configuration.json @@ -0,0 +1,106 @@ +{ + "simulation": { + "topic": { + "description": "Television is bad for people." + }, + "user": { + "class": "StaticUser", + "llm": { + "url": "https://llm.srv.webis.de/api/chat", + "model": "default", + "keep_alive": "24h" + }, + "start": "You try to convice someone that {{variables.topic.description}}. Write them a message in about 20 words that states your point of view.\n\nFormat your message as JSON with exactly one key, 'utterance', that has your message.\n\n", + "followUp": "Someone told you: '{{variables.systemResponse.utterance}}'\n\nBut you want to convince them that {{variables.topic.description}}. Follow up on their argument by writing them a message in about 20 words that counters their argument. Format your message as JSON with exactly these keys:\n- key='reasoning': A step-by-step explanation of why you think your message is a good counter to their argument.\n- key='utterance': Your message.\n\n" + }, + "system": { + "class": "GenerativeElasticSystem", + "llm": { + "url": "https://llm.srv.webis.de/api/chat", + "model": "default", + "keep_alive": "24h" + }, + "search": { + "url": "http://localhost:9200/kialo/", + "query": { + "match": { + "claim": { + "query": "{{variables.userTurn.utterance}}" + } + } + }, + "size": 5 + }, + "generation": { + "message": "\n\nIn order to counter the argument '{{variables.userTurn.utterance}}' you found the following counters:\n\n{{variables.results}}\n\nSelect the most convicing counter to their original argument from these counters and respond to them in about 20 words using your selected counter.\n\nFormat your message as JSON with exactly these keys:\n- key='reasoning': A step-by-step explanation of why you selected the specific counter.\n- key='counter': The text of the counter you selected.\n- key='utterance': Your message.\n\n", + "searchResultKeys": [ + "counter" + ] + } + }, + "maxTurns": 3 + }, + "evaluation": { + "evaluators": { + "Readability": { + "class": "ReadabilityEvaluator", + "measure": "fleschKincaidGrade" + }, + "Clarity": { + "class": "PromptedEvaluator", + "llm": { + "url": "https://llm.srv.webis.de/api/chat", + "model": "default", + "keep_alive": "24h" + }, + "prompt": "### Issue:\n{{variables.simulation.configuration.topic.description}}\n\n### Argument:\n{{variables.userTurn.utterance}}\n\n### Counter-argument:\n{{variables.userTurn.systemResponse.utterance}}\n\n### Definition of {{dimension.name}}:\n{{dimension.definition}}\n\nOn a scale from 0 (extremely bad) to 1 (extremely good), how would you rate the {{dimension.name}} of the counter-argument?\n\nFormat your message as JSON with exactly these keys:\n- key='reasoning': A step-by-step explanation of your score.\n- key='score': The score as number between 0 and 1.\n\n", + "dimension": { + "name": "Clarity", + "definition": "The author uses clear, grammatically correct and unambiguous language. The author sticks to the main topic and does not make things overly complicated." + } + }, + "Clarity": { + "class": "PromptedEvaluator", + "llm": { + "url": "https://llm.srv.webis.de/api/chat", + "model": "default", + "keep_alive": "24h" + }, + "prompt": "### Issue:\n{{variables.simulation.configuration.topic.description}}\n\n### Argument:\n{{variables.userTurn.utterance}}\n\n### Counter-argument:\n{{variables.userTurn.systemResponse.utterance}}\n\n### Definition of {{dimension.name}}:\n{{dimension.definition}}\n\nOn a scale from 0 (extremely bad) to 1 (extremely good), how would you rate the {{dimension.name}} of the counter-argument?\n\nFormat your message as JSON with exactly these keys:\n- key='explanation': A step-by-step explanation of your score.\n- key='score': The score as number between 0 and 1.\n\n", + "requiredKeys": [ "explanation" ], + "dimension": { + "name": "Clarity", + "definition": "The author uses clear, grammatically correct and unambiguous language. The author sticks to the main topic and does not make things overly complicated." + } + }, + "Global Relevance": { + "class": "PromptedEvaluator", + "llm": { + "url": "https://llm.srv.webis.de/api/chat", + "model": "default", + "keep_alive": "24h" + }, + "prompt": "### Issue:\n{{variables.simulation.configuration.topic.description}}\n\n### Argument:\n{{variables.userTurn.utterance}}\n\n### Counter-argument:\n{{variables.userTurn.systemResponse.utterance}}\n\n### Definition of {{dimension.name}}:\n{{dimension.definition}}\n\nOn a scale from 0 (extremely bad) to 1 (extremely good), how would you rate the {{dimension.name}} of the counter-argument?\n\nFormat your message as JSON with exactly these keys:\n- key='explanation': A step-by-step explanation of your score.\n- key='score': The score as number between 0 and 1.\n\n", + "requiredKeys": [ "explanation" ], + "dimension": { + "name": "Global Relevance", + "definition": "The counter-argument (assuming it is true), is relevant for resolving a discussion around the issue." + } + }, + "Local Relevance": { + "class": "PromptedEvaluator", + "llm": { + "url": "https://llm.srv.webis.de/api/chat", + "model": "default", + "keep_alive": "24h" + }, + "prompt": "### Issue:\n{{variables.simulation.configuration.topic.description}}\n\n### Argument:\n{{variables.userTurn.utterance}}\n\n### Counter-argument:\n{{variables.userTurn.systemResponse.utterance}}\n\n### Definition of {{dimension.name}}:\n{{dimension.definition}}\n\nOn a scale from 0 (extremely bad) to 1 (extremely good), how would you rate the {{dimension.name}} of the counter-argument?\n\nFormat your message as JSON with exactly these keys:\n- key='explanation': A step-by-step explanation for your score.\n- key='score': The score as number between 0 and 1.\n\n", + "requiredKeys": [ "explanation" ], + "dimension": { + "name": "Local Relevance", + "definition": "The counter-argument (assuming it is true), is relevant to the argument: it tells why one could refute the argument." + } + } + } + } +} diff --git a/static/css/main.css b/static/css/main.css index 2dcbb18..1eeb73f 100644 --- a/static/css/main.css +++ b/static/css/main.css @@ -32,6 +32,7 @@ main .pane { } main #simulation { min-width: 400px; + flex-shrink: 0.5; } main .pane > .area { flex-grow: 1; @@ -87,6 +88,7 @@ details.empty summary::marker { .buttons { text-align: right; margin-top: 10px; + margin-bottom: 10px; } .bubble .buttons { text-align: center; diff --git a/static/css/messages.css b/static/css/messages.css index 62898a6..c6200b4 100644 --- a/static/css/messages.css +++ b/static/css/messages.css @@ -11,4 +11,7 @@ #messages details:not(.empty) > div { padding-bottom: 10px; } +#messages details[data-source="controller"] { + margin-top: 20px; +} diff --git a/static/js/configuration.js b/static/js/configuration.js index 08804b7..e8bea96 100644 --- a/static/js/configuration.js +++ b/static/js/configuration.js @@ -79,7 +79,7 @@ export function loadFromFile(fileElement) { event.stopPropagation(); event.preventDefault(); dropZone.classList.remove("active"); - loadConfigurationFromFile(event.dataTransfer); + loadFromFile(event.dataTransfer); }, false); } diff --git a/static/js/simulation.js b/static/js/simulation.js index d962907..8c53e8d 100644 --- a/static/js/simulation.js +++ b/static/js/simulation.js @@ -15,7 +15,11 @@ function log(logEntry) { logContainerParentElement.setAttribute("data-source", logEntry.source); logContainerParentElement.setAttribute("data-action", logEntry.action); const logContainerTitleElement = document.createElement("summary"); - logContainerTitleElement.textContent = logEntry.source + ": " + logEntry.action; + if (logEntry.source === "controller") { + logContainerTitleElement.textContent = logEntry.action; + } else { + logContainerTitleElement.textContent = logEntry.source + ": " + logEntry.action; + } logContainerParentElement.appendChild(logContainerTitleElement); logContainerElement = document.createElement("div"); logContainerParentElement.appendChild(logContainerElement); @@ -149,11 +153,32 @@ export async function run(configuration) { && data.result.score !== undefined) { scoreBadge.innerText = data.result.score.toFixed(2); if (data.result.explanation !== undefined) { - scoreBadge.setAttribute("title", data.result.explanation); + if (typeof(data.result.explanation) === "string") { + scoreBadge.setAttribute("title", data.result.explanation); + } else { + scoreBadge.setAttribute("title", + JSON.stringify(data.result.explanation)); + } } } } } else if (message.overallEvaluations) { // final result + const chatBubble = ensureChatBubble("controller"); + const loaderElement = chatBubble.querySelector(".loader"); + if (loaderElement !== null) { + loaderElement.parentElement.removeChild(loaderElement); + } + + const buttonsElement = document.createElement("div"); + buttonsElement.classList.add("buttons"); + const buttonElement = document.createElement("a"); + const data = encodeURIComponent(JSON.stringify(message, null, 2)); + buttonElement.textContent = "download"; + buttonElement.setAttribute("href", "data:text/json;charset=utf8," + data); + const date = new Date().toJSON().replaceAll(/[:.]/g, "-"); + buttonElement.setAttribute("download", "genirsim-evaluation-" + date + ".json"); + buttonsElement.appendChild(buttonElement); + chatBubble.appendChild(buttonsElement); console.log(message); } else { // something else => error console.error(message);