-
Notifications
You must be signed in to change notification settings - Fork 0
/
llm-evaluator.tsx
223 lines (209 loc) · 7.95 KB
/
llm-evaluator.tsx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
"use client";
import { createAndRunEval } from "@/app/actions";
import { Button } from "@/components/ui/button";
import { Label } from "@/components/ui/label";
import { RadioGroup } from "@/components/ui/radio-group";
import { Textarea } from "@/components/ui/textarea";
import { cn } from "@/lib/utils";
import type {
evaluateAnthropic,
evaluateXAI,
evaluateOpenAI,
summarizeEvals,
LLMProviders,
} from "@/trigger/batch";
import { useRealtimeRunsWithTag } from "@trigger.dev/react-hooks";
import { Sparkles } from "lucide-react";
import { useState } from "react";
import AnthropicEval from "@/components/evals/Anthropic";
import XIAEval from "@/components/evals/XAI";
import OpenAIEval from "@/components/evals/OpenAI";
export default function LLMEvaluator() {
const [prompt, setPrompt] = useState("");
const [selectedModel, setSelectedModel] = useState<LLMProviders | undefined>(
undefined
);
const [isLoading, setIsLoading] = useState(false);
const [isSubmitted, setIsSubmitted] = useState(false);
const [accessToken, setAccessToken] = useState<string | undefined>(undefined);
const [evaluationId, setEvaluationId] = useState<string | undefined>(
undefined
);
const { runs } = useRealtimeRunsWithTag<
| typeof evaluateAnthropic
| typeof evaluateXAI
| typeof evaluateOpenAI
| typeof summarizeEvals
>(`eval:${evaluationId}`, {
enabled: !!evaluationId,
accessToken,
baseURL: process.env.NEXT_PUBLIC_TRIGGER_API_URL,
});
const handleSubmit = async (e: React.FormEvent) => {
e.preventDefault();
setIsLoading(true);
setSelectedModel(undefined);
setIsSubmitted(false);
const { evaluation, accessToken } = await createAndRunEval(prompt);
setAccessToken(accessToken);
setEvaluationId(evaluation.id);
setIsLoading(false);
};
const handleSubmitEvaluation = () => {
setIsSubmitted(true);
console.log(
`Submitted evaluation: ${selectedModel} was chosen as the best model.`
);
};
const summarizeEvalsRun = runs.find(
(run) => run.taskIdentifier === "summarize-evals"
);
return (
<div className="min-h-screen bg-black text-white">
{/* Gradient background */}
<div className="absolute inset-0 bg-gradient-to-tr from-gray-900 via-gray-900/90 to-gray-900/80" />
<div className="relative">
{/* Header with gradient text */}
<div className="border-b border-gray-800">
<div className="container mx-auto px-4 py-6">
<h1 className="text-3xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-gray-100 to-gray-400">
LLM Evaluator
</h1>
</div>
</div>
{/* Main content */}
<div className="container mx-auto px-4 py-8 space-y-8">
{/* Prompt input section */}
<form onSubmit={handleSubmit} className="space-y-4 max-w-2xl">
<div className="space-y-2">
<Label
htmlFor="prompt"
className="text-sm font-medium text-gray-300"
>
Enter your prompt:
</Label>
<div className="relative">
<Textarea
id="prompt"
value={prompt}
onChange={(e) => setPrompt(e.target.value)}
className="min-h-[100px] bg-gray-900/50 backdrop-blur-sm border-gray-800 focus:border-gray-700 focus:ring-gray-700 resize-none"
placeholder="Type your prompt here..."
required
/>
<div className="absolute inset-0 rounded-md pointer-events-none bg-gradient-to-tr from-gray-800/5 via-transparent to-transparent" />
</div>
</div>
<Button
type="submit"
disabled={isLoading}
className={cn(
"w-full sm:w-auto transition-all duration-300",
"bg-gradient-to-r from-gray-800 to-gray-900 hover:from-gray-700 hover:to-gray-800",
"border border-gray-700 hover:border-gray-600",
"text-gray-100 shadow-lg",
"disabled:opacity-50 disabled:cursor-not-allowed"
)}
>
{isLoading ? (
<div className="flex items-center gap-2">
<Sparkles className="w-4 h-4 animate-pulse" />
<span>Evaluating...</span>
</div>
) : (
"Evaluate"
)}
</Button>
</form>
{/* Results section */}
{runs.length > 0 && (
<div className="space-y-4">
<h2 className="text-xl font-semibold text-gray-200">Results:</h2>
<RadioGroup
value={selectedModel}
onValueChange={(value) =>
setSelectedModel(value as LLMProviders)
}
disabled={isSubmitted}
className="grid grid-cols-1 md:grid-cols-3 gap-4"
>
{runs.map((run) => {
switch (run.taskIdentifier) {
case "eval-anthropic": {
return (
<AnthropicEval
key={run.id}
run={run}
accessToken={accessToken!}
isSelected={selectedModel === "anthropic"}
tag={summarizeEvalsRun?.output?.anthropic}
/>
);
}
case "eval-xai": {
return (
<XIAEval
key={run.id}
run={run}
accessToken={accessToken!}
isSelected={selectedModel === "xai"}
tag={summarizeEvalsRun?.output?.xai}
/>
);
}
case "eval-openai": {
return (
<OpenAIEval
key={run.id}
run={run}
accessToken={accessToken!}
isSelected={selectedModel === "openai"}
tag={summarizeEvalsRun?.output?.openai}
/>
);
}
}
})}
</RadioGroup>
{/* Evaluation submission */}
<div className="flex flex-col items-center gap-4 pt-4">
{selectedModel && !isSubmitted && (
<>
<p className="text-sm text-gray-400">
You selected{" "}
<span className="font-medium text-gray-200">
{selectedModel}
</span>{" "}
as the best model.
</p>
<Button
onClick={handleSubmitEvaluation}
className={cn(
"transition-all duration-300",
"bg-gradient-to-r from-gray-800 to-gray-900 hover:from-gray-700 hover:to-gray-800",
"border border-gray-700 hover:border-gray-600",
"text-gray-100 shadow-lg"
)}
>
Submit Evaluation
</Button>
</>
)}
{isSubmitted && (
<div className="flex items-center gap-2 text-sm text-emerald-400">
<Sparkles className="w-4 h-4" />
<p>
Evaluation submitted successfully! You chose{" "}
<span className="font-medium">{selectedModel}</span> as
the best model.
</p>
</div>
)}
</div>
</div>
)}
</div>
</div>
</div>
);
}