Add vertex ai summarization metrics

firebase · May 2, 2024 · f8f991d · f8f991d
1 parent 5a6fa82
commit f8f991d
Showing 1 changed file with 140 additions and 9 deletions.
diff --git a/js/plugins/vertexai/src/evaluation.ts b/js/plugins/vertexai/src/evaluation.ts
@@ -10,7 +10,7 @@
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
+ * See the License for the specix c language governing permissions and
  * limitations under the License.
  */
 
@@ -31,6 +31,9 @@ export enum VertexAIEvaluationMetricType {
   FLUENCY = 'FLEUNCY',
   SAFETY = 'SAFETY',
   GROUNDEDNESS = 'GROUNDEDNESS',
+  SUMMARIZATION_QUALITY = 'SUMMARIZATION_QUALITY',
+  SUMMARIZATION_HELPFULNESS = 'SUMMARIZATION_HELPFULNESS',
+  SUMMARIZATION_VERBOSITY = 'SUMMARIZATION_VERBOSITY',
 }
 
 /**
@@ -75,6 +78,15 @@ export function vertexEvaluators(
       case VertexAIEvaluationMetricType.GROUNDEDNESS: {
         return createGroundednessEvaluator(factory, metricSpec);
       }
+      case VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY: {
+        return createSummarizationQualityEvaluator(factory, metricSpec);
+      }
+      case VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS: {
+        return createSummarizationHelpfulnessEvaluator(factory, metricSpec);
+      }
+      case VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY: {
+        return createSummarizationVerbosityEvaluator(factory, metricSpec);
+      }
     }
   });
 }
@@ -105,10 +117,6 @@ function createBleuEvaluator(
       responseSchema: BleuResponseSchema,
     },
     (datapoint) => {
-      if (!datapoint.reference) {
-        throw new Error('Reference is required');
-      }
-
       return {
         bleuInput: {
           metricSpec,
@@ -149,10 +157,6 @@ function createRougeEvaluator(
       responseSchema: RougeResponseSchema,
     },
     (datapoint) => {
-      if (!datapoint.reference) {
-        throw new Error('Reference is required');
-      }
-
       return {
         rougeInput: {
           metricSpec,
@@ -292,3 +296,130 @@ function createGroundednessEvaluator(
     }
   );
 }
+
+const SummarizationQualityResponseSchema = z.object({
+  summarizationQualityResult: z.object({
+    score: z.number(),
+    explanation: z.string(),
+    confidence: z.number(),
+  }),
+});
+
+function createSummarizationQualityEvaluator(
+  factory: EvaluatorFactory,
+  metricSpec: any
+): Action {
+  return factory.create(
+    {
+      metric: VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY,
+      displayName: 'Summarization quality',
+      definition: 'Assesses the overall ability to summarize text',
+      responseSchema: SummarizationQualityResponseSchema,
+    },
+    (datapoint) => {
+      return {
+        summarizationQualityInput: {
+          metricSpec,
+          instance: {
+            prediction: datapoint.output as string,
+            instruction: datapoint.input as string,
+            context: datapoint.context?.join('. '),
+          },
+        },
+      };
+    },
+    (response) => {
+      return {
+        score: response.summarizationQualityResult.score,
+        details: {
+          reasoning: response.summarizationQualityResult.explanation,
+        },
+      };
+    }
+  );
+}
+
+const SummarizationHelpfulnessResponseSchema = z.object({
+  summarizationHelpfulnessResult: z.object({
+    score: z.number(),
+    explanation: z.string(),
+    confidence: z.number(),
+  }),
+});
+
+function createSummarizationHelpfulnessEvaluator(
+  factory: EvaluatorFactory,
+  metricSpec: any
+): Action {
+  return factory.create(
+    {
+      metric: VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS,
+      displayName: 'Summarization helpfulness',
+      definition:
+        'Assesses the ability to provide a summarization, which contains the details necessary to substitute the original text',
+      responseSchema: SummarizationHelpfulnessResponseSchema,
+    },
+    (datapoint) => {
+      return {
+        summarizationHelpfulnessInput: {
+          metricSpec,
+          instance: {
+            prediction: datapoint.output as string,
+            instruction: datapoint.input as string,
+            context: datapoint.context?.join('. '),
+          },
+        },
+      };
+    },
+    (response) => {
+      return {
+        score: response.summarizationHelpfulnessResult.score,
+        details: {
+          reasoning: response.summarizationHelpfulnessResult.explanation,
+        },
+      };
+    }
+  );
+}
+
+const SummarizationVerbositySchema = z.object({
+  summarizationVerbosityResult: z.object({
+    score: z.number(),
+    explanation: z.string(),
+    confidence: z.number(),
+  }),
+});
+
+function createSummarizationVerbosityEvaluator(
+  factory: EvaluatorFactory,
+  metricSpec: any
+): Action {
+  return factory.create(
+    {
+      metric: VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY,
+      displayName: 'Summarization verbosity',
+      definition: 'Aassess the ability to provide a succinct summarization',
+      responseSchema: SummarizationVerbositySchema,
+    },
+    (datapoint) => {
+      return {
+        summarizationVerbosityInput: {
+          metricSpec,
+          instance: {
+            prediction: datapoint.output as string,
+            instruction: datapoint.input as string,
+            context: datapoint.context?.join('. '),
+          },
+        },
+      };
+    },
+    (response) => {
+      return {
+        score: response.summarizationVerbosityResult.score,
+        details: {
+          reasoning: response.summarizationVerbosityResult.explanation,
+        },
+      };
+    }
+  );
+}