evals: Fix bug that prevented multiple turns from displaying (#34128)

Release Notes:

- N/A
This commit is contained in:
Oleksiy Syvokon 2025-07-09 18:31:58 +03:00 committed by GitHub
parent a9b82e1e57
commit b9b42bee99
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -324,20 +324,8 @@
<body> <body>
<h1 id="current-filename">Thread Explorer</h1> <h1 id="current-filename">Thread Explorer</h1>
<div class="view-switcher"> <div class="view-switcher">
<button <button id="full-view" class="view-button active" onclick="switchView('full')">Full View</button>
id="full-view" <button id="compact-view" class="view-button" onclick="switchView('compact')">Compact View</button>
class="view-button active"
onclick="switchView('full')"
>
Full View
</button>
<button
id="compact-view"
class="view-button"
onclick="switchView('compact')"
>
Compact View
</button>
<button <button
id="export-button" id="export-button"
class="view-button" class="view-button"
@ -347,11 +335,7 @@
Export Export
</button> </button>
<div class="theme-switcher"> <div class="theme-switcher">
<button <button id="theme-toggle" class="theme-button" onclick="toggleTheme()">
id="theme-toggle"
class="theme-button"
onclick="toggleTheme()"
>
<span id="theme-icon" class="theme-icon">☀️</span> <span id="theme-icon" class="theme-icon">☀️</span>
<span id="theme-text">Light</span> <span id="theme-text">Light</span>
</button> </button>
@ -368,8 +352,7 @@
&larr; Previous &larr; Previous
</button> </button>
<div class="thread-indicator"> <div class="thread-indicator">
Thread <span id="current-thread-index">1</span> of Thread <span id="current-thread-index">1</span> of <span id="total-threads">1</span>:
<span id="total-threads">1</span>:
<span id="thread-id">Default Thread</span> <span id="thread-id">Default Thread</span>
</div> </div>
<button <button
@ -423,9 +406,7 @@
function toggleTheme() { function toggleTheme() {
// If currently system or light, switch to dark // If currently system or light, switch to dark
if (themeMode === "system") { if (themeMode === "system") {
const systemDark = window.matchMedia( const systemDark = window.matchMedia("(prefers-color-scheme: dark)").matches;
"(prefers-color-scheme: dark)",
).matches;
themeMode = systemDark ? "light" : "dark"; themeMode = systemDark ? "light" : "dark";
} else { } else {
themeMode = themeMode === "light" ? "dark" : "light"; themeMode = themeMode === "light" ? "dark" : "light";
@ -442,19 +423,15 @@
function initTheme() { function initTheme() {
if (themeMode === "system") { if (themeMode === "system") {
// Use system preference // Use system preference
const systemDark = window.matchMedia( const systemDark = window.matchMedia("(prefers-color-scheme: dark)").matches;
"(prefers-color-scheme: dark)",
).matches;
applyTheme(systemDark ? "dark" : "light"); applyTheme(systemDark ? "dark" : "light");
// Listen for system theme changes // Listen for system theme changes
window window.matchMedia("(prefers-color-scheme: dark)").addEventListener("change", (e) => {
.matchMedia("(prefers-color-scheme: dark)") if (themeMode === "system") {
.addEventListener("change", (e) => { applyTheme(e.matches ? "dark" : "light");
if (themeMode === "system") { }
applyTheme(e.matches ? "dark" : "light"); });
}
});
} else { } else {
// Use saved preference // Use saved preference
applyTheme(themeMode); applyTheme(themeMode);
@ -466,49 +443,38 @@
viewMode = mode; viewMode = mode;
// Update button states // Update button states
document document.getElementById("full-view").classList.toggle("active", mode === "full");
.getElementById("full-view") document.getElementById("compact-view").classList.toggle("active", mode === "compact");
.classList.toggle("active", mode === "full");
document
.getElementById("compact-view")
.classList.toggle("active", mode === "compact");
// Add or remove compact-mode class on the body // Add or remove compact-mode class on the body
document.body.classList.toggle( document.body.classList.toggle("compact-mode", mode === "compact");
"compact-mode",
mode === "compact",
);
// Re-render the thread with the new view mode // Re-render the thread with the new view mode
renderThread(); renderThread();
} }
// Function to export the current thread as a JSON file // Function to export the current thread as a JSON file
function exportThreadAsJson() { function exportThreadAsJson() {
// Clone the thread to avoid modifying the original // Clone the thread to avoid modifying the original
const threadToExport = JSON.parse(JSON.stringify(thread)); const threadToExport = JSON.parse(JSON.stringify(thread));
// Create a Blob with the JSON data // Create a Blob with the JSON data
const blob = new Blob( const blob = new Blob([JSON.stringify(threadToExport, null, 2)], { type: "application/json" });
[JSON.stringify(threadToExport, null, 2)],
{ type: "application/json" }
);
// Create a download link // Create a download link
const url = URL.createObjectURL(blob); const url = URL.createObjectURL(blob);
const a = document.createElement("a"); const a = document.createElement("a");
a.href = url; a.href = url;
// Generate filename based on thread ID or index // Generate filename based on thread ID or index
const filename = threadToExport.thread_id || const filename =
threadToExport.filename || threadToExport.thread_id || threadToExport.filename || `thread-${currentThreadIndex + 1}.json`;
`thread-${currentThreadIndex + 1}.json`;
a.download = filename.endsWith(".json") ? filename : `${filename}.json`; a.download = filename.endsWith(".json") ? filename : `${filename}.json`;
// Trigger the download // Trigger the download
document.body.appendChild(a); document.body.appendChild(a);
a.click(); a.click();
// Clean up // Clean up
setTimeout(() => { setTimeout(() => {
document.body.removeChild(a); document.body.removeChild(a);
@ -524,9 +490,7 @@
}, },
{ {
role: "user", role: "user",
content: [ content: [{ Text: "Fix the bug: kwargs not passed..." }],
{ Text: "Fix the bug: kwargs not passed..." },
],
}, },
{ {
role: "assistant", role: "assistant",
@ -593,12 +557,9 @@
name: "edit_file", name: "edit_file",
input: { input: {
path: "fastmcp/core.py", path: "fastmcp/core.py",
old_string: old_string: "def start_server(app):\n anyio.run(app)",
"def start_server(app):\n anyio.run(app)", new_string: "def start_server(app, **kwargs):\n anyio.run(app, **kwargs)",
new_string: display_description: "Fix kwargs passing to anyio.run",
"def start_server(app, **kwargs):\n anyio.run(app, **kwargs)",
display_description:
"Fix kwargs passing to anyio.run",
}, },
is_input_complete: true, is_input_complete: true,
}, },
@ -681,14 +642,10 @@
// Function to update the navigation buttons state // Function to update the navigation buttons state
function updateNavigationButtons() { function updateNavigationButtons() {
document.getElementById("prev-thread").disabled = document.getElementById("prev-thread").disabled = currentThreadIndex <= 0;
currentThreadIndex <= 0; document.getElementById("next-thread").disabled = currentThreadIndex >= threads.length - 1;
document.getElementById("next-thread").disabled = document.getElementById("current-thread-index").textContent = currentThreadIndex + 1;
currentThreadIndex >= threads.length - 1; document.getElementById("total-threads").textContent = threads.length;
document.getElementById("current-thread-index").textContent =
currentThreadIndex + 1;
document.getElementById("total-threads").textContent =
threads.length;
} }
function renderThread() { function renderThread() {
@ -696,20 +653,15 @@
tbody.innerHTML = ""; // Clear existing content tbody.innerHTML = ""; // Clear existing content
// Set thread name if available // Set thread name if available
const threadId = const threadId = thread.thread_id || `Thread ${currentThreadIndex + 1}`;
thread.thread_id || `Thread ${currentThreadIndex + 1}`;
document.getElementById("thread-id").textContent = threadId; document.getElementById("thread-id").textContent = threadId;
// Set filename in the header if available // Set filename in the header if available
const filename = const filename = thread.filename || `Thread ${currentThreadIndex + 1}`;
thread.filename || `Thread ${currentThreadIndex + 1}`; document.getElementById("current-filename").textContent = filename;
document.getElementById("current-filename").textContent =
filename;
// Skip system message // Skip system message
const nonSystemMessages = thread.messages.filter( const nonSystemMessages = thread.messages.filter((msg) => msg.role !== "system");
(msg) => msg.role !== "system",
);
let turnNumber = 0; let turnNumber = 0;
processMessages(nonSystemMessages, tbody, turnNumber); processMessages(nonSystemMessages, tbody, turnNumber);
@ -737,9 +689,7 @@
for (const content of msg.content) { for (const content of msg.content) {
if (content.hasOwnProperty("Text")) { if (content.hasOwnProperty("Text")) {
if (assistantText) { if (assistantText) {
assistantText += assistantText += "<br><br>" + formatContent(content.Text);
"<br><br>" +
formatContent(content.Text);
} else { } else {
assistantText = formatContent(content.Text); assistantText = formatContent(content.Text);
} }
@ -763,49 +713,33 @@
tbody.appendChild(row); tbody.appendChild(row);
// Add all tool calls to the tools cell // Add all tool calls to the tools cell
const toolsCell = document.getElementById( const toolsCell = document.getElementById(`tools-${turnNumber}`);
`tools-${turnNumber}`, const resultsCell = document.getElementById(`results-${turnNumber}`);
);
const resultsCell = document.getElementById(
`results-${turnNumber}`,
);
// Process all tools and their results // Process all tools and their results
for (let j = 0; j < toolUses.length; j++) { for (let j = 0; j < toolUses.length; j++) {
const toolUse = toolUses[j]; const toolUse = toolUses[j];
const toolCall = formatToolCall( const toolCall = formatToolCall(toolUse.name, toolUse.input);
toolUse.name,
toolUse.input,
);
// Add the tool call to the tools cell // Add the tool call to the tools cell
if (j > 0) toolsCell.innerHTML += "<hr>"; if (j > 0) toolsCell.innerHTML += "<hr>";
toolsCell.innerHTML += toolCall; toolsCell.innerHTML += toolCall;
// Look for corresponding tool result // Look for corresponding tool result
if ( if (hasMatchingToolResult(messages, i, toolUse.name)) {
hasMatchingToolResult(messages, i, toolUse.name)
) {
const resultMsg = messages[i + 1]; const resultMsg = messages[i + 1];
const toolResult = findToolResult( const toolResult = findToolResult(resultMsg, toolUse.name);
resultMsg,
toolUse.name,
);
if (toolResult) { if (toolResult) {
// Add the result to the results cell // Add the result to the results cell
if (j > 0) resultsCell.innerHTML += "<hr>"; if (j > 0) resultsCell.innerHTML += "<hr>";
// Create a container for the result // Create a container for the result
const resultDiv = const resultDiv = document.createElement("div");
document.createElement("div");
resultDiv.className = "tool-result"; resultDiv.className = "tool-result";
// Format and display the tool result // Format and display the tool result
formatToolResultInline( formatToolResultInline(toolResult.content.Text, resultDiv);
toolResult.content,
resultDiv,
);
resultsCell.appendChild(resultDiv); resultsCell.appendChild(resultDiv);
// Skip the result message in the next iteration // Skip the result message in the next iteration
@ -815,10 +749,7 @@
} }
} }
} }
} else if ( } else if (msg.role === "user" && msg.content.some((c) => c.hasOwnProperty("ToolResult"))) {
msg.role === "user" &&
msg.content.some((c) => c.hasOwnProperty("ToolResult"))
) {
// Skip tool result messages as they are handled with their corresponding tool use // Skip tool result messages as they are handled with their corresponding tool use
continue; continue;
} }
@ -826,10 +757,7 @@
} }
function isUserQuery(message) { function isUserQuery(message) {
return ( return message.role === "user" && !message.content.some((c) => c.hasOwnProperty("ToolResult"));
message.role === "user" &&
!message.content.some((c) => c.hasOwnProperty("ToolResult"))
);
} }
function renderUserMessage(message, turnNumber, tbody) { function renderUserMessage(message, turnNumber, tbody) {
@ -848,18 +776,14 @@
currentIndex + 1 < messages.length && currentIndex + 1 < messages.length &&
messages[currentIndex + 1].role === "user" && messages[currentIndex + 1].role === "user" &&
messages[currentIndex + 1].content.some( messages[currentIndex + 1].content.some(
(c) => (c) => c.hasOwnProperty("ToolResult") && c.ToolResult.tool_name === toolName,
c.hasOwnProperty("ToolResult") &&
c.ToolResult.tool_name === toolName,
) )
); );
} }
function findToolResult(resultMessage, toolName) { function findToolResult(resultMessage, toolName) {
const toolResultContent = resultMessage.content.find( const toolResultContent = resultMessage.content.find(
(c) => (c) => c.hasOwnProperty("ToolResult") && c.ToolResult.tool_name === toolName,
c.hasOwnProperty("ToolResult") &&
c.ToolResult.tool_name === toolName,
); );
return toolResultContent ? toolResultContent.ToolResult : null; return toolResultContent ? toolResultContent.ToolResult : null;
@ -874,18 +798,12 @@
for (const [key, value] of Object.entries(input)) { for (const [key, value] of Object.entries(input)) {
if (value !== null && value !== undefined) { if (value !== null && value !== undefined) {
// Store full parameter for expanded view // Store full parameter for expanded view
let fullValue = let fullValue = typeof value === "string" ? `"${value}"` : value;
typeof value === "string"
? `"${value}"`
: value;
fullParams.push([key, fullValue]); fullParams.push([key, fullValue]);
// Abbreviated value for compact view // Abbreviated value for compact view
let displayValue = fullValue; let displayValue = fullValue;
if ( if (typeof value === "string" && value.length > 30) {
typeof value === "string" &&
value.length > 30
) {
displayValue = `"${value.substring(0, 30)}..."`; displayValue = `"${value.substring(0, 30)}..."`;
} }
params.push(`${key}=${displayValue}`); params.push(`${key}=${displayValue}`);
@ -903,10 +821,7 @@
// For the full view, use the original untruncated values // For the full view, use the original untruncated values
let result = `<span class="tool-name">${name}</span>(`; let result = `<span class="tool-name">${name}</span>(`;
const formattedParams = fullParams const formattedParams = fullParams
.map( .map((p) => `&nbsp;&nbsp;&nbsp;&nbsp;${p[0]}=${p[1]}`)
(p) =>
`&nbsp;&nbsp;&nbsp;&nbsp;${p[0]}=${p[1]}`,
)
.join(",<br/>"); .join(",<br/>");
const fullView = `${result}<br/>${formattedParams}<br/>)`; const fullView = `${result}<br/>${formattedParams}<br/>)`;
@ -925,8 +840,7 @@
for (const [key, value] of Object.entries(input)) { for (const [key, value] of Object.entries(input)) {
if (value !== null && value !== undefined) { if (value !== null && value !== undefined) {
// Format different types of values // Format different types of values
let formattedValue = let formattedValue = typeof value === "string" ? `"${value}"` : value;
typeof value === "string" ? `"${value}"` : value;
params.push([key, formattedValue]); params.push([key, formattedValue]);
} }
} }
@ -938,9 +852,7 @@
return `${result}${params[0][1]})`; return `${result}${params[0][1]})`;
} else { } else {
// Format parameters // Format parameters
const formattedParams = params const formattedParams = params.map((p) => `&nbsp;&nbsp;&nbsp;&nbsp;${p[0]}=${p[1]}`).join(",<br/>");
.map((p) => `&nbsp;&nbsp;&nbsp;&nbsp;${p[0]}=${p[1]}`)
.join(",<br/>");
return `${result}<br/>${formattedParams}<br/>)`; return `${result}<br/>${formattedParams}<br/>)`;
} }
} }
@ -1013,21 +925,13 @@
// Keyboard navigation handler // Keyboard navigation handler
document.addEventListener("keydown", function (event) { document.addEventListener("keydown", function (event) {
// previous thread // previous thread
if ( if ((event.ctrlKey && event.key === "ArrowLeft") || event.key === "h" || event.key === "k") {
(event.ctrlKey && event.key === "ArrowLeft") ||
event.key === "h" ||
event.key === "k"
) {
if (!document.getElementById("prev-thread").disabled) { if (!document.getElementById("prev-thread").disabled) {
previousThread(); previousThread();
} }
} }
// next thread // next thread
else if ( else if ((event.ctrlKey && event.key === "ArrowRight") || event.key === "j" || event.key === "l") {
(event.ctrlKey && event.key === "ArrowRight") ||
event.key === "j" ||
event.key === "l"
) {
if (!document.getElementById("next-thread").disabled) { if (!document.getElementById("next-thread").disabled) {
nextThread(); nextThread();
} }