import { Laminar, LaminarClient, observe } from "@lmnr-ai/lmnr";
import { OpenAI } from "openai";
Laminar.initialize({
projectApiKey: 'your_project_api_key',
instrumentModules: {
openAI: OpenAI
}
});
const client = new LaminarClient({
projectApiKey: 'your_project_api_key',
});
const openai = new OpenAI({ apiKey: 'your_openai_api_key' });
const executeTestCase = async (testCase) => {
return await observe(
{ name: 'executor', spanType: 'EXECUTOR', input: testCase.data },
async () => {
const response = await openai.chat.completions.create({
model: 'gpt-4o-mini',
messages: [
{
role: 'user',
content: `What is the capital of ${testCase.data.country}? ` +
'Answer only with the capital, no other text.'
}
]
});
return response.choices[0].message.content || '';
}
);
};
const accuracy = async (output, target) => {
return await observe(
{ name: 'accuracy', spanType: 'EVALUATOR', input: { output, target } },
async () => {
if (!target) return 0;
return output.includes(target) ? 1 : 0;
}
);
};
async function runEvaluation() {
try {
const testData = [
{
data: { country: 'France' },
target: 'Paris',
},
{
data: { country: 'Germany' },
target: 'Berlin',
},
];
const evalId = await client.evals.create({
name: "Capital of Country Manual Eval",
groupName: "Manual API - Capital Cities"
});
for (let i = 0; i < testData.length; i++) {
await observe(
{ name: 'evaluation', spanType: 'EVALUATION', input: { testCase: testData[i] } },
async () => {
const testCase = testData[i];
// Save datapoint first to associate trace as evaluation type
const datapointId = await client.evals.createDatapoint({
evalId,
data: testCase.data,
target: testCase.target,
index: i,
// Must be called within span context
traceId: Laminar.getTraceId(),
});
const output = await executeTestCase(testCase);
const accuracyScore = await accuracy(output, testCase.target)
await client.evals.updateDatapoint({
evalId,
datapointId,
scores: { accuracy: accuracyScore },
executorOutput: {
response: output,
model: 'gpt-4o-mini',
country: testCase.data.country
},
});
}
);
}
await Laminar.flush();
} catch (error) {
console.error("Error:", error.message);
}
}
runEvaluation();