Skip to content

Commit

Permalink
Add vertex ai summarization metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
tagboola committed May 2, 2024
1 parent 5a6fa82 commit f8f991d
Showing 1 changed file with 140 additions and 9 deletions.
149 changes: 140 additions & 9 deletions js/plugins/vertexai/src/evaluation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* See the License for the specix c language governing permissions and
* limitations under the License.
*/

Expand All @@ -31,6 +31,9 @@ export enum VertexAIEvaluationMetricType {
FLUENCY = 'FLEUNCY',
SAFETY = 'SAFETY',
GROUNDEDNESS = 'GROUNDEDNESS',
SUMMARIZATION_QUALITY = 'SUMMARIZATION_QUALITY',
SUMMARIZATION_HELPFULNESS = 'SUMMARIZATION_HELPFULNESS',
SUMMARIZATION_VERBOSITY = 'SUMMARIZATION_VERBOSITY',
}

/**
Expand Down Expand Up @@ -75,6 +78,15 @@ export function vertexEvaluators(
case VertexAIEvaluationMetricType.GROUNDEDNESS: {
return createGroundednessEvaluator(factory, metricSpec);
}
case VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY: {
return createSummarizationQualityEvaluator(factory, metricSpec);
}
case VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS: {
return createSummarizationHelpfulnessEvaluator(factory, metricSpec);
}
case VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY: {
return createSummarizationVerbosityEvaluator(factory, metricSpec);
}
}
});
}
Expand Down Expand Up @@ -105,10 +117,6 @@ function createBleuEvaluator(
responseSchema: BleuResponseSchema,
},
(datapoint) => {
if (!datapoint.reference) {
throw new Error('Reference is required');
}

return {
bleuInput: {
metricSpec,
Expand Down Expand Up @@ -149,10 +157,6 @@ function createRougeEvaluator(
responseSchema: RougeResponseSchema,
},
(datapoint) => {
if (!datapoint.reference) {
throw new Error('Reference is required');
}

return {
rougeInput: {
metricSpec,
Expand Down Expand Up @@ -292,3 +296,130 @@ function createGroundednessEvaluator(
}
);
}

const SummarizationQualityResponseSchema = z.object({
summarizationQualityResult: z.object({
score: z.number(),
explanation: z.string(),
confidence: z.number(),
}),
});

function createSummarizationQualityEvaluator(
factory: EvaluatorFactory,
metricSpec: any
): Action {
return factory.create(
{
metric: VertexAIEvaluationMetricType.SUMMARIZATION_QUALITY,
displayName: 'Summarization quality',
definition: 'Assesses the overall ability to summarize text',
responseSchema: SummarizationQualityResponseSchema,
},
(datapoint) => {
return {
summarizationQualityInput: {
metricSpec,
instance: {
prediction: datapoint.output as string,
instruction: datapoint.input as string,
context: datapoint.context?.join('. '),
},
},
};
},
(response) => {
return {
score: response.summarizationQualityResult.score,
details: {
reasoning: response.summarizationQualityResult.explanation,
},
};
}
);
}

const SummarizationHelpfulnessResponseSchema = z.object({
summarizationHelpfulnessResult: z.object({
score: z.number(),
explanation: z.string(),
confidence: z.number(),
}),
});

function createSummarizationHelpfulnessEvaluator(
factory: EvaluatorFactory,
metricSpec: any
): Action {
return factory.create(
{
metric: VertexAIEvaluationMetricType.SUMMARIZATION_HELPFULNESS,
displayName: 'Summarization helpfulness',
definition:
'Assesses the ability to provide a summarization, which contains the details necessary to substitute the original text',
responseSchema: SummarizationHelpfulnessResponseSchema,
},
(datapoint) => {
return {
summarizationHelpfulnessInput: {
metricSpec,
instance: {
prediction: datapoint.output as string,
instruction: datapoint.input as string,
context: datapoint.context?.join('. '),
},
},
};
},
(response) => {
return {
score: response.summarizationHelpfulnessResult.score,
details: {
reasoning: response.summarizationHelpfulnessResult.explanation,
},
};
}
);
}

const SummarizationVerbositySchema = z.object({
summarizationVerbosityResult: z.object({
score: z.number(),
explanation: z.string(),
confidence: z.number(),
}),
});

function createSummarizationVerbosityEvaluator(
factory: EvaluatorFactory,
metricSpec: any
): Action {
return factory.create(
{
metric: VertexAIEvaluationMetricType.SUMMARIZATION_VERBOSITY,
displayName: 'Summarization verbosity',
definition: 'Aassess the ability to provide a succinct summarization',
responseSchema: SummarizationVerbositySchema,
},
(datapoint) => {
return {
summarizationVerbosityInput: {
metricSpec,
instance: {
prediction: datapoint.output as string,
instruction: datapoint.input as string,
context: datapoint.context?.join('. '),
},
},
};
},
(response) => {
return {
score: response.summarizationVerbosityResult.score,
details: {
reasoning: response.summarizationVerbosityResult.explanation,
},
};
}
);
}

0 comments on commit f8f991d

Please sign in to comment.