설치형 환경에서 Code 탭 작업이 오래 걸릴 때 첫 도구 호출 전에 정체되는 구간을 추적할 수 있도록 StreamingToolExecutionCoordinator에 대기 heartbeat와 첫 응답 수신 로그를 추가했다. 첫 응답 전에는 모델 요청 시작, 응답 대기 시간, 첫 응답 수신 시점을 AgentLoopWait 로그와 Thinking 이벤트로 남기고, 이후 응답 지연도 heartbeat로 표시하도록 조정했다. 함께 StreamingToolExecutionCoordinatorTests를 추가해 첫 응답 지연 시 heartbeat가 노출되는 경로와 빠른 응답 시 불필요한 heartbeat가 생기지 않는 경로를 고정했다. README.md와 docs/DEVELOPMENT.md에 2026-04-15 14:55 (KST) 기준 이력을 반영했고, dotnet build src/AxCopilot/AxCopilot.csproj -c Release -v minimal -p:OutputPath=bin\\verify_llm_wait_diag\\ -p:IntermediateOutputPath=obj\\verify_llm_wait_diag\\ 경고 0/오류 0, dotnet test src/AxCopilot.Tests/AxCopilot.Tests.csproj -c Release -v minimal --filter "StreamingToolExecutionCoordinatorTests|AgentLoopLlmRequestPreparationServiceTests|AgentLoopIterationPreparationServiceTests" -p:OutputPath=bin\\verify_llm_wait_diag_tests\\ -p:IntermediateOutputPath=obj\\verify_llm_wait_diag_tests\\ 통과 6을 확인했다.
233 lines
10 KiB
C#
233 lines
10 KiB
C#
using System.Collections.Generic;
|
|
using System.Diagnostics;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using AxCopilot.Models;
|
|
using AxCopilot.Services;
|
|
|
|
namespace AxCopilot.Services.Agent;
|
|
|
|
internal sealed class StreamingToolExecutionCoordinator : IToolExecutionCoordinator
|
|
{
|
|
private static readonly HashSet<string> PrefetchableReadOnlyTools = new(StringComparer.OrdinalIgnoreCase)
|
|
{
|
|
"file_read", "document_read",
|
|
"env_tool", "datetime_tool",
|
|
"dev_env_detect", "memory", "json_tool", "regex_tool", "base64_tool",
|
|
"hash_tool", "image_analyze"
|
|
};
|
|
|
|
private readonly ILlmService _llm;
|
|
private readonly Func<string, IReadOnlyCollection<string>, string> _resolveRequestedToolName;
|
|
private readonly Func<string, JsonElement, AgentContext, List<ChatMessage>?, CancellationToken, Task<ToolResult>> _executeToolAsync;
|
|
private readonly Action<AgentEventType, string, string> _emitEvent;
|
|
private readonly Func<string?, bool> _isContextOverflowError;
|
|
private readonly Func<List<ChatMessage>, bool> _forceContextRecovery;
|
|
private readonly Func<Exception, bool> _isTransientLlmError;
|
|
private readonly Func<int, Exception, int> _computeTransientBackoffDelayMs;
|
|
private readonly TimeSpan _firstResponseHeartbeatDelay;
|
|
private readonly TimeSpan _responseHeartbeatInterval;
|
|
|
|
public StreamingToolExecutionCoordinator(
|
|
ILlmService llm,
|
|
Func<string, IReadOnlyCollection<string>, string> resolveRequestedToolName,
|
|
Func<string, JsonElement, AgentContext, List<ChatMessage>?, CancellationToken, Task<ToolResult>> executeToolAsync,
|
|
Action<AgentEventType, string, string> emitEvent,
|
|
Func<string?, bool> isContextOverflowError,
|
|
Func<List<ChatMessage>, bool> forceContextRecovery,
|
|
Func<Exception, bool> isTransientLlmError,
|
|
Func<int, Exception, int> computeTransientBackoffDelayMs,
|
|
TimeSpan? firstResponseHeartbeatDelay = null,
|
|
TimeSpan? responseHeartbeatInterval = null)
|
|
{
|
|
_llm = llm;
|
|
_resolveRequestedToolName = resolveRequestedToolName;
|
|
_executeToolAsync = executeToolAsync;
|
|
_emitEvent = emitEvent;
|
|
_isContextOverflowError = isContextOverflowError;
|
|
_forceContextRecovery = forceContextRecovery;
|
|
_isTransientLlmError = isTransientLlmError;
|
|
_computeTransientBackoffDelayMs = computeTransientBackoffDelayMs;
|
|
_firstResponseHeartbeatDelay = firstResponseHeartbeatDelay ?? TimeSpan.FromSeconds(8);
|
|
_responseHeartbeatInterval = responseHeartbeatInterval ?? TimeSpan.FromSeconds(15);
|
|
}
|
|
|
|
public async Task<ToolPrefetchResult?> TryPrefetchReadOnlyToolAsync(
|
|
ContentBlock block,
|
|
IReadOnlyCollection<IAgentTool> tools,
|
|
AgentContext context,
|
|
CancellationToken ct)
|
|
{
|
|
var activeToolNames = tools.Select(t => t.Name).Distinct(StringComparer.OrdinalIgnoreCase).ToList();
|
|
var resolvedToolName = _resolveRequestedToolName(block.ToolName, activeToolNames);
|
|
block.ResolvedToolName = resolvedToolName;
|
|
|
|
if (!PrefetchableReadOnlyTools.Contains(resolvedToolName))
|
|
return null;
|
|
|
|
_emitEvent(
|
|
AgentEventType.Thinking,
|
|
resolvedToolName,
|
|
$"읽기 도구 조기 실행 준비: {resolvedToolName}");
|
|
|
|
var sw = Stopwatch.StartNew();
|
|
try
|
|
{
|
|
var input = block.ToolInput ?? JsonDocument.Parse("{}").RootElement;
|
|
var result = await _executeToolAsync(resolvedToolName, input, context, null, ct);
|
|
sw.Stop();
|
|
return new ToolPrefetchResult(result, sw.ElapsedMilliseconds, resolvedToolName);
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
sw.Stop();
|
|
return new ToolPrefetchResult(
|
|
ToolResult.Fail($"조기 실행 오류: {ex.Message}"),
|
|
sw.ElapsedMilliseconds,
|
|
resolvedToolName);
|
|
}
|
|
}
|
|
|
|
public async Task<List<ContentBlock>> SendWithToolsWithRecoveryAsync(
|
|
List<ChatMessage> messages,
|
|
IReadOnlyCollection<IAgentTool> tools,
|
|
CancellationToken ct,
|
|
string phaseLabel,
|
|
AgentLoopService.RunState? runState = null,
|
|
bool forceToolCall = false,
|
|
Func<ContentBlock, Task<ToolPrefetchResult?>>? prefetchToolCallAsync = null,
|
|
Func<ToolStreamEvent, Task>? onStreamEventAsync = null)
|
|
{
|
|
var transientRetries = runState?.TransientLlmErrorRetries ?? 0;
|
|
var contextRecoveryRetries = runState?.ContextRecoveryAttempts ?? 0;
|
|
|
|
while (true)
|
|
{
|
|
var streamedAnyPartialState = false;
|
|
try
|
|
{
|
|
if (onStreamEventAsync == null)
|
|
return await _llm.SendWithToolsAsync(messages, tools, ct, forceToolCall, prefetchToolCallAsync);
|
|
|
|
var blocks = new List<ContentBlock>();
|
|
var textBuilder = new StringBuilder();
|
|
var (service, model) = _llm.GetCurrentModelInfo();
|
|
LogService.Info(
|
|
$"[AgentLoopWait] {phaseLabel}: LLM 요청 시작 (service={service}, model={model}, messages={messages.Count}, tools={tools.Count}, forceToolCall={forceToolCall})");
|
|
_emitEvent(AgentEventType.Thinking, "", $"{phaseLabel}: 모델에 요청하는 중입니다...");
|
|
|
|
var waitStopwatch = Stopwatch.StartNew();
|
|
var firstEventReceived = false;
|
|
var nextHeartbeatAt = _firstResponseHeartbeatDelay;
|
|
|
|
await using var stream = _llm
|
|
.StreamWithToolsAsync(messages, tools, forceToolCall, prefetchToolCallAsync, ct)
|
|
.GetAsyncEnumerator(ct);
|
|
|
|
while (true)
|
|
{
|
|
var moveNextTask = stream.MoveNextAsync().AsTask();
|
|
while (!moveNextTask.IsCompleted)
|
|
{
|
|
var remaining = nextHeartbeatAt - waitStopwatch.Elapsed;
|
|
if (remaining < TimeSpan.Zero)
|
|
remaining = TimeSpan.Zero;
|
|
|
|
var delayTask = Task.Delay(remaining, ct);
|
|
var completedTask = await Task.WhenAny(moveNextTask, delayTask).ConfigureAwait(false);
|
|
if (completedTask == moveNextTask)
|
|
break;
|
|
|
|
var waited = waitStopwatch.Elapsed;
|
|
EmitWaitHeartbeat(phaseLabel, waited, firstEventReceived);
|
|
nextHeartbeatAt = waited + _responseHeartbeatInterval;
|
|
}
|
|
|
|
if (!await moveNextTask.ConfigureAwait(false))
|
|
break;
|
|
|
|
var evt = stream.Current;
|
|
if (!firstEventReceived)
|
|
{
|
|
firstEventReceived = true;
|
|
LogService.Info(
|
|
$"[AgentLoopWait] {phaseLabel}: 첫 응답 수신 ({waitStopwatch.ElapsedMilliseconds}ms, kind={evt.Kind})");
|
|
if (waitStopwatch.Elapsed >= _firstResponseHeartbeatDelay)
|
|
_emitEvent(AgentEventType.Thinking, "", $"{phaseLabel}: 모델 첫 응답을 받아 계속 진행합니다.");
|
|
}
|
|
|
|
await onStreamEventAsync(evt);
|
|
if (evt.Kind == ToolStreamEventKind.TextDelta && !string.IsNullOrWhiteSpace(evt.Text))
|
|
{
|
|
streamedAnyPartialState = true;
|
|
textBuilder.Append(evt.Text);
|
|
}
|
|
else if (evt.Kind == ToolStreamEventKind.ToolCallReady && evt.ToolCall != null)
|
|
{
|
|
streamedAnyPartialState = true;
|
|
blocks.Add(evt.ToolCall);
|
|
}
|
|
}
|
|
|
|
var result = new List<ContentBlock>();
|
|
var text = textBuilder.ToString().Trim();
|
|
if (!string.IsNullOrWhiteSpace(text))
|
|
result.Add(new ContentBlock { Type = "text", Text = text });
|
|
result.AddRange(blocks);
|
|
return result;
|
|
}
|
|
catch (Exception ex)
|
|
{
|
|
if (_isContextOverflowError(ex.Message)
|
|
&& contextRecoveryRetries < 2
|
|
&& _forceContextRecovery(messages))
|
|
{
|
|
if (onStreamEventAsync != null && streamedAnyPartialState)
|
|
await onStreamEventAsync(new ToolStreamEvent(ToolStreamEventKind.RetryReset, $"{phaseLabel}:retry"));
|
|
contextRecoveryRetries++;
|
|
if (runState != null)
|
|
runState.ContextRecoveryAttempts = contextRecoveryRetries;
|
|
|
|
_emitEvent(
|
|
AgentEventType.Thinking,
|
|
"",
|
|
$"{phaseLabel}: 컨텍스트 한도 초과로 대화를 압축한 후 재시도합니다 ({contextRecoveryRetries}/2)");
|
|
continue;
|
|
}
|
|
|
|
if (ct.IsCancellationRequested)
|
|
throw;
|
|
|
|
if (_isTransientLlmError(ex) && transientRetries < 3)
|
|
{
|
|
if (onStreamEventAsync != null && streamedAnyPartialState)
|
|
await onStreamEventAsync(new ToolStreamEvent(ToolStreamEventKind.RetryReset, $"{phaseLabel}:retry"));
|
|
transientRetries++;
|
|
if (runState != null)
|
|
runState.TransientLlmErrorRetries = transientRetries;
|
|
|
|
var delayMs = _computeTransientBackoffDelayMs(transientRetries, ex);
|
|
_emitEvent(
|
|
AgentEventType.Thinking,
|
|
"",
|
|
$"{phaseLabel}: 일시적 LLM 오류로 {delayMs}ms 후 재시도합니다 ({transientRetries}/3)");
|
|
await Task.Delay(delayMs, ct);
|
|
continue;
|
|
}
|
|
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
|
|
private void EmitWaitHeartbeat(string phaseLabel, TimeSpan waited, bool firstEventReceived)
|
|
{
|
|
var seconds = Math.Max(1, (int)Math.Round(waited.TotalSeconds));
|
|
var summary = firstEventReceived
|
|
? $"{phaseLabel}: 모델 응답이 길어져 계속 기다리는 중입니다... ({seconds}초)"
|
|
: $"{phaseLabel}: 모델 첫 응답을 기다리는 중입니다... ({seconds}초)";
|
|
LogService.Info($"[AgentLoopWait] {summary}");
|
|
_emitEvent(AgentEventType.Thinking, "", summary);
|
|
}
|
|
}
|