update

webis-de · Jun 19, 2024 · cdd671b · cdd671b
1 parent f01e348
commit cdd671b
Show file tree

Hide file tree

Showing 9 changed files with 46 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -1,15 +1,17 @@
+![webis logo](./docs/img/webis-logo-gray.png "Logo of the Webis network") [![genirsim logo](./docs/img/genirsim-logo.png "Logo of GenIRSim: Generated by Midjourney")](./docs/img/genirsim-logo.png)
+
 # GenIRSim
 
 Quickstart:
 ```
 npm install
-npm exec simulate-search configurations/default-configuration.json > eval.json
+npm exec genirsim configurations/default-configuration.json > eval.json
 ```
 
 To run web server: 
 ```
-npm install ws
-npm exec simulation-server
+npm install
+npm exec genirsim-server
 ```
 
 

diff --git a/bin/simulation-server.js → bin/genirsim-server.js b/bin/simulation-server.js → bin/genirsim-server.js
diff --git a/bin/simulate.js → bin/genirsim.js b/bin/simulate.js → bin/genirsim.js
diff --git a/docs/img/genirsim-logo.png b/docs/img/genirsim-logo.png
diff --git a/docs/img/webis-logo-gray.png b/docs/img/webis-logo-gray.png
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -8,8 +8,8 @@
     "src"
   ],
   "bin": {
-    "simulate": "bin/simulate.js",
-    "simulation-server": "bin/simulation-server.js"
+    "genirsim": "bin/genirsim.js",
+    "genirsim-server": "bin/genirsim-server.js"
   },
   "scripts": {
     "doc": "documentation build -f html -o docs/ --github --config docs/config.yml"

diff --git a/src/index.js b/src/index.js
@@ -134,6 +134,9 @@ async function evaluateTurn(instantiatedEvaluators, logbook, simulation, userTur
     const evaluation =
       await evaluator.evaluate(simulation, userTurnIndex, evaluatorLogbook);
     if (evaluation !== null) {
+      if (typeof(evaluation.score) === "string") {
+        evaluation.score = parseInt(evaluation.score);
+      }
       if (userTurnIndex !== undefined) {
         logbook.log(turnName + " result",
           {userTurnIndex, evaluator: name, result: evaluation});

diff --git a/static/configurations/default-configuration.json b/static/configurations/default-configuration.json
@@ -10,9 +10,9 @@
         "model": "default",
         "keep_alive": "24h"
       },
-      "start": "You have the following task: {{variables.topic.description}}\n\n{{formatting}}\n\n",
-      "followUp": "{{variables.systemResponse.utterance}}\n\nFollow up on the response above. {{formatting}}\n\n",
-      "formatting": "Format your message as JSON with exactly these keys:\n- key='expectation': A description of what you expect the system's answer to the utterance to contain.\n- key='utterance': The short utterance you would send to the conversational search system to solve your task."
+      "start": "### Task:\n{{variables.topic.description}}\n\nNow write a message for a conversational search system to request the information you are lacking.\n\n{{formatting}}\n\n",
+      "followUp": "### Task:\n{{variables.topic.description}}\n\n### Information:\n{{variables.systemResponse.utterance}}\n\n### Instruction: Follow-up questions are the questions elicited from readers as they naturally read through text. Given the information, write a follow-up questions that you would ask if you were reading this information for the first time.\n\n{{formatting}}\n\n",
+      "formatting": "Format your message as JSON with exactly these keys:\n- key='expectation': A description of what you expect the system's answer to your message to contain.\n- key='utterance': Your short message."
     },
     "system": {
       "class": "GenerativeElasticSystem",
@@ -45,9 +45,39 @@
   },
   "evaluation": {
     "evaluators": {
-      "Readability": {
+      "Simplicity": {
         "class": "ReadabilityEvaluator",
         "measure": "fleschKincaidGrade"
+      },
+      "Topic Relevance": {
+        "class": "PromptedEvaluator",
+        "llm": {
+          "url": "https://llm.srv.webis.de/api/chat",
+          "model": "default",
+          "keep_alive": "24h"
+        },
+        "prompt": "### Task:\n{{variables.simulation.configuration.topic.description}}\n\n### Response:\n{{variables.userTurn.systemResponse.utterance}}\n\nScore from 0 (extremely bad) to 1 (extremely good) the topic relevance of the response: whether the response is relevant for fulfilling the task.\n\nFormat your message as JSON with exactly these keys:\n- key='explanation': A brief explanation of how you got to the score\n- key='score': The score you give between 0 (response not relevant to topic) and 1 (response completely fulfills the task).",
+        "requiredKeys": [ "explanation" ]
+      },
+      "Request Relevance": {
+        "class": "PromptedEvaluator",
+        "llm": {
+          "url": "https://llm.srv.webis.de/api/chat",
+          "model": "default",
+          "keep_alive": "24h"
+        },
+        "prompt": "### Request:\n{{variables.userTurn.utterance}}\n\n### Response:\n{{variables.userTurn.systemResponse.utterance}}\n\nScore from 0 (extremely bad) to 1 (extremely good) the request relevance of the response: whether the response is relevant for answering the request.\n\nFormat your message as JSON with exactly these keys:\n- key='explanation': A brief explanation of how you got to the score\n- key='score': The score you give between 0 (response not relevant to request) and 1 (response completely answers the request).",
+        "requiredKeys": [ "explanation" ]
+      },
+      "Expectation Match": {
+        "class": "PromptedEvaluator",
+        "llm": {
+          "url": "https://llm.srv.webis.de/api/chat",
+          "model": "default",
+          "keep_alive": "24h"
+        },
+        "prompt": "### Expectation:\n{{variables.userTurn.expectation}}\n\n### Response:\n{{variables.userTurn.systemResponse.utterance}}\n\nScore from 0 (extremely bad) to 1 (extremely good) the expectation match of the response: whether the response matches the expectation.\n\nFormat your message as JSON with exactly these keys:\n- key='explanation': A brief explanation of how you got to the score\n- key='score': The score you give between 0 (no connection between response and expectation) and 1 (response completely matches the expectation).",
+        "requiredKeys": [ "explanation" ]
       }
     }
   }