vLLM 도구 호출 스트리밍 실행기와 코워크 루프 실시간 소비 구조 추가
Some checks failed
Release Gate / gate (push) Has been cancelled

- LlmService에 tool-use 전용 streaming event API를 추가하고 OpenAI vLLM IBM 경로의 partial tool_call 조립을 event 기반으로 재구성함
- Cowork/Code 루프가 streaming event를 직접 소비하도록 바꿔 도구 호출 감지와 진행 표시를 더 빠르게 갱신함
- read-only 도구 조기 실행이 기존 loop와 실제로 이어지도록 정리하고 최종 실행에서는 prefetch 결과를 재사용함
- README와 DEVELOPMENT 문서를 2026-04-08 11:31(KST) 기준으로 갱신함

검증
- dotnet build src/AxCopilot/AxCopilot.csproj -c Release -v minimal -p:OutputPath=bin\verify\ -p:IntermediateOutputPath=obj\verify\
- 경고 0 / 오류 0
This commit is contained in:
2026-04-08 16:58:11 +09:00
parent 90ef3400f6
commit 6e99837a4c
5 changed files with 309 additions and 50 deletions

View File

@@ -32,6 +32,18 @@ public partial class LlmService
long ElapsedMilliseconds,
string? ResolvedToolName = null);
public enum ToolStreamEventKind
{
TextDelta,
ToolCallReady,
Completed
}
public sealed record ToolStreamEvent(
ToolStreamEventKind Kind,
string Text = "",
ContentBlock? ToolCall = null);
/// <summary>도구 정의를 포함하여 LLM에 요청하고, 텍스트 + tool_use 블록을 파싱하여 반환합니다.</summary>
/// <param name="forceToolCall">
/// true이면 <c>tool_choice: "required"</c>를 요청에 추가하여 모델이 반드시 도구를 호출하도록 강제합니다.
@@ -56,6 +68,51 @@ public partial class LlmService
};
}
public async IAsyncEnumerable<ToolStreamEvent> StreamWithToolsAsync(
List<ChatMessage> messages,
IReadOnlyCollection<IAgentTool> tools,
bool forceToolCall = false,
Func<ContentBlock, Task<ToolPrefetchResult?>>? prefetchToolCallAsync = null,
[EnumeratorCancellation] CancellationToken ct = default)
{
var activeService = ResolveService();
EnsureOperationModeAllowsLlmService(activeService);
switch (NormalizeServiceName(activeService))
{
case "ollama":
case "vllm":
await foreach (var evt in StreamOpenAiToolEventsAsync(messages, tools, ct, forceToolCall, prefetchToolCallAsync).WithCancellation(ct))
yield return evt;
yield break;
case "sigmoid":
foreach (var block in await SendSigmoidWithToolsAsync(messages, tools, ct))
{
if (block.Type == "text" && !string.IsNullOrWhiteSpace(block.Text))
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, block.Text);
else if (block.Type == "tool_use")
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: block);
}
yield return new ToolStreamEvent(ToolStreamEventKind.Completed);
yield break;
case "gemini":
foreach (var block in await SendGeminiWithToolsAsync(messages, tools, ct))
{
if (block.Type == "text" && !string.IsNullOrWhiteSpace(block.Text))
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, block.Text);
else if (block.Type == "tool_use")
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: block);
}
yield return new ToolStreamEvent(ToolStreamEventKind.Completed);
yield break;
default:
throw new NotSupportedException($"서비스 '{activeService}'는 아직 Function Calling 스트리밍을 지원하지 않습니다.");
}
}
/// <summary>도구 실행 결과를 LLM에 피드백하기 위한 메시지를 생성합니다.</summary>
public static ChatMessage CreateToolResultMessage(string toolId, string toolName, string result)
{
@@ -909,14 +966,127 @@ public partial class LlmService
bool usesIbmDeploymentApi,
Func<ContentBlock, Task<ToolPrefetchResult?>>? prefetchToolCallAsync,
CancellationToken ct)
{
var blocks = new List<ContentBlock>();
var textBuilder = new StringBuilder();
await foreach (var evt in StreamOpenAiToolEventsAsync(resp, usesIbmDeploymentApi, prefetchToolCallAsync, ct).WithCancellation(ct))
{
if (evt.Kind == ToolStreamEventKind.TextDelta && !string.IsNullOrWhiteSpace(evt.Text))
{
textBuilder.Append(evt.Text);
}
else if (evt.Kind == ToolStreamEventKind.ToolCallReady && evt.ToolCall != null)
{
blocks.Add(evt.ToolCall);
}
}
var text = textBuilder.ToString().Trim();
var result = new List<ContentBlock>();
if (!string.IsNullOrWhiteSpace(text))
result.Add(new ContentBlock { Type = "text", Text = text });
result.AddRange(blocks);
if (!result.Any(b => b.Type == "tool_use"))
{
var textBlock = result.FirstOrDefault(b => b.Type == "text" && !string.IsNullOrWhiteSpace(b.Text));
if (textBlock != null)
{
var extracted = TryExtractToolCallsFromText(textBlock.Text);
if (extracted.Count > 0)
{
foreach (var block in extracted)
{
if (prefetchToolCallAsync != null)
block.PrefetchedExecutionTask = prefetchToolCallAsync(block);
}
result.AddRange(extracted);
LogService.Debug($"[ToolUse] 텍스트에서 도구 호출 {extracted.Count}건 추출 (SSE 폴백 파싱)");
}
}
}
return result;
}
private async IAsyncEnumerable<ToolStreamEvent> StreamOpenAiToolEventsAsync(
List<ChatMessage> messages,
IReadOnlyCollection<IAgentTool> tools,
[EnumeratorCancellation] CancellationToken ct,
bool forceToolCall = false,
Func<ContentBlock, Task<ToolPrefetchResult?>>? prefetchToolCallAsync = null)
{
var activeService = ResolveService();
var (resolvedEp, _, allowInsecureTls) = ResolveServerInfo();
var endpoint = string.IsNullOrEmpty(resolvedEp)
? ResolveEndpointForService(activeService)
: resolvedEp;
var registered = GetActiveRegisteredModel();
var isIbmDeployment = UsesIbmDeploymentChatApi(activeService, registered, endpoint);
var body = isIbmDeployment
? BuildIbmToolBody(messages, tools, forceToolCall)
: BuildOpenAiToolBody(messages, tools, forceToolCall);
string url;
if (isIbmDeployment)
url = BuildIbmDeploymentChatUrl(endpoint, stream: true);
else if (activeService.Equals("ollama", StringComparison.OrdinalIgnoreCase))
url = endpoint.TrimEnd('/') + "/api/chat";
else
url = endpoint.TrimEnd('/') + "/v1/chat/completions";
var json = JsonSerializer.Serialize(body);
using var req = new HttpRequestMessage(HttpMethod.Post, url)
{
Content = new StringContent(json, Encoding.UTF8, "application/json")
};
await ApplyAuthHeaderAsync(req, ct);
using var resp = await SendWithTlsAsync(req, allowInsecureTls, ct, HttpCompletionOption.ResponseHeadersRead);
if (!resp.IsSuccessStatusCode)
{
var errBody = await resp.Content.ReadAsStringAsync(ct);
var detail = ExtractErrorDetail(errBody);
if (isIbmDeployment && forceToolCall && (int)resp.StatusCode == 400)
{
var fallbackBody = BuildIbmToolBody(messages, tools, forceToolCall: true, useToolChoice: false);
var fallbackJson = JsonSerializer.Serialize(fallbackBody);
using var retryReq = new HttpRequestMessage(HttpMethod.Post, url)
{
Content = new StringContent(fallbackJson, Encoding.UTF8, "application/json")
};
await ApplyAuthHeaderAsync(retryReq, ct);
using var retryResp = await SendWithTlsAsync(retryReq, allowInsecureTls, ct, HttpCompletionOption.ResponseHeadersRead);
if (!retryResp.IsSuccessStatusCode)
throw new ToolCallNotSupportedException($"{activeService} API 오류 ({retryResp.StatusCode}): {detail}");
await foreach (var evt in StreamOpenAiToolEventsAsync(retryResp, true, prefetchToolCallAsync, ct).WithCancellation(ct))
yield return evt;
yield break;
}
if ((int)resp.StatusCode == 400)
throw new ToolCallNotSupportedException($"{activeService} API 오류 ({resp.StatusCode}): {detail}");
throw new HttpRequestException($"{activeService} API 오류 ({resp.StatusCode}): {detail}");
}
await foreach (var evt in StreamOpenAiToolEventsAsync(resp, isIbmDeployment, prefetchToolCallAsync, ct).WithCancellation(ct))
yield return evt;
}
private async IAsyncEnumerable<ToolStreamEvent> StreamOpenAiToolEventsAsync(
HttpResponseMessage resp,
bool usesIbmDeploymentApi,
Func<ContentBlock, Task<ToolPrefetchResult?>>? prefetchToolCallAsync,
[EnumeratorCancellation] CancellationToken ct)
{
using var stream = await resp.Content.ReadAsStreamAsync(ct);
using var reader = new StreamReader(stream);
var firstChunkReceived = false;
var textBuilder = new StringBuilder();
var toolAccumulators = new Dictionary<int, ToolCallAccumulator>();
var emittedTools = new List<ContentBlock>();
var lastIbmGeneratedText = "";
while (!reader.EndOfStream && !ct.IsCancellationRequested)
@@ -952,8 +1122,21 @@ public partial class LlmService
throw new ToolCallNotSupportedException(detail ?? "IBM vLLM 도구 호출 응답 오류");
}
if (TryExtractMessageToolBlocks(root, textBuilder, emittedTools))
if (TryExtractMessageToolBlocks(root, out var messageText, out var directToolBlocks))
{
if (!string.IsNullOrWhiteSpace(messageText))
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, messageText);
if (directToolBlocks.Count > 0)
{
foreach (var toolBlock in directToolBlocks)
{
if (prefetchToolCallAsync != null)
toolBlock.PrefetchedExecutionTask = prefetchToolCallAsync(toolBlock);
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: toolBlock);
}
}
continue;
}
if (usesIbmDeploymentApi &&
root.TryGetProperty("results", out var resultsEl) &&
@@ -970,13 +1153,15 @@ public partial class LlmService
{
if (generatedText.StartsWith(lastIbmGeneratedText, StringComparison.Ordinal))
{
textBuilder.Append(generatedText[lastIbmGeneratedText.Length..]);
var delta = generatedText[lastIbmGeneratedText.Length..];
if (!string.IsNullOrEmpty(delta))
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, delta);
lastIbmGeneratedText = generatedText;
}
else
{
textBuilder.Clear();
textBuilder.Append(generatedText);
if (!string.IsNullOrEmpty(generatedText))
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, generatedText);
lastIbmGeneratedText = generatedText;
}
}
@@ -994,7 +1179,7 @@ public partial class LlmService
{
var chunk = contentEl.GetString();
if (!string.IsNullOrEmpty(chunk))
textBuilder.Append(chunk);
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, chunk);
}
if (deltaEl.TryGetProperty("tool_calls", out var toolCallsEl) &&
@@ -1030,53 +1215,50 @@ public partial class LlmService
}
}
await TryEmitCompletedToolCallAsync(acc, emittedTools, prefetchToolCallAsync).ConfigureAwait(false);
var emittedBlock = await TryCreateCompletedToolCallAsync(acc, prefetchToolCallAsync).ConfigureAwait(false);
if (emittedBlock != null)
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: emittedBlock);
}
}
}
if (firstChoice.TryGetProperty("message", out var messageEl))
TryExtractMessageToolBlocks(messageEl, textBuilder, emittedTools);
}
}
foreach (var acc in toolAccumulators.Values.OrderBy(a => a.Index))
await TryEmitCompletedToolCallAsync(acc, emittedTools, prefetchToolCallAsync, forceEmit: true).ConfigureAwait(false);
var blocks = new List<ContentBlock>();
var text = textBuilder.ToString().Trim();
if (!string.IsNullOrWhiteSpace(text))
blocks.Add(new ContentBlock { Type = "text", Text = text });
blocks.AddRange(emittedTools);
if (!blocks.Any(b => b.Type == "tool_use"))
{
var textBlock = blocks.FirstOrDefault(b => b.Type == "text" && !string.IsNullOrWhiteSpace(b.Text));
if (textBlock != null)
{
var extracted = TryExtractToolCallsFromText(textBlock.Text);
if (extracted.Count > 0)
{
foreach (var block in extracted)
if (TryExtractMessageToolBlocks(messageEl, out var messageText2, out var directToolBlocks2))
{
if (prefetchToolCallAsync != null)
block.PrefetchedExecutionTask = prefetchToolCallAsync(block);
if (!string.IsNullOrWhiteSpace(messageText2))
yield return new ToolStreamEvent(ToolStreamEventKind.TextDelta, messageText2);
if (directToolBlocks2.Count > 0)
{
foreach (var toolBlock in directToolBlocks2)
{
if (prefetchToolCallAsync != null)
toolBlock.PrefetchedExecutionTask = prefetchToolCallAsync(toolBlock);
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: toolBlock);
}
}
}
blocks.AddRange(extracted);
LogService.Debug($"[ToolUse] 텍스트에서 도구 호출 {extracted.Count}건 추출 (SSE 폴백 파싱)");
}
}
}
return blocks;
foreach (var acc in toolAccumulators.Values.OrderBy(a => a.Index))
{
var emittedBlock = await TryCreateCompletedToolCallAsync(acc, prefetchToolCallAsync, forceEmit: true).ConfigureAwait(false);
if (emittedBlock != null)
yield return new ToolStreamEvent(ToolStreamEventKind.ToolCallReady, ToolCall: emittedBlock);
}
yield return new ToolStreamEvent(ToolStreamEventKind.Completed);
}
private static bool TryExtractMessageToolBlocks(
JsonElement messageOrRoot,
StringBuilder textBuilder,
List<ContentBlock> emittedTools)
out string text,
out List<ContentBlock> toolBlocks)
{
text = "";
toolBlocks = new List<ContentBlock>();
JsonElement message = messageOrRoot;
if (messageOrRoot.TryGetProperty("message", out var nestedMessage))
message = nestedMessage;
@@ -1085,10 +1267,10 @@ public partial class LlmService
if (message.TryGetProperty("content", out var contentEl) &&
contentEl.ValueKind == JsonValueKind.String)
{
var text = contentEl.GetString();
if (!string.IsNullOrWhiteSpace(text))
var parsedText = contentEl.GetString();
if (!string.IsNullOrWhiteSpace(parsedText))
{
textBuilder.Append(text);
text = parsedText;
consumed = true;
}
}
@@ -1119,7 +1301,7 @@ public partial class LlmService
}
}
emittedTools.Add(new ContentBlock
toolBlocks.Add(new ContentBlock
{
Type = "tool_use",
ToolName = functionEl.TryGetProperty("name", out var nameEl) ? nameEl.GetString() ?? "" : "",
@@ -1154,21 +1336,20 @@ public partial class LlmService
}
}
private static async Task TryEmitCompletedToolCallAsync(
private static async Task<ContentBlock?> TryCreateCompletedToolCallAsync(
ToolCallAccumulator acc,
List<ContentBlock> emittedTools,
Func<ContentBlock, Task<ToolPrefetchResult?>>? prefetchToolCallAsync,
bool forceEmit = false)
{
if (acc.Emitted || string.IsNullOrWhiteSpace(acc.Name))
return;
return null;
var argsJson = acc.Arguments.ToString().Trim();
JsonElement? parsedArgs = null;
if (!string.IsNullOrEmpty(argsJson))
{
if (!forceEmit && !LooksLikeCompleteJson(argsJson))
return;
return null;
try
{
@@ -1178,7 +1359,7 @@ public partial class LlmService
catch
{
if (!forceEmit)
return;
return null;
}
}
@@ -1193,8 +1374,8 @@ public partial class LlmService
if (prefetchToolCallAsync != null)
block.PrefetchedExecutionTask = prefetchToolCallAsync(block);
emittedTools.Add(block);
acc.Emitted = true;
return block;
}
// ─── 공통 헬퍼 ─────────────────────────────────────────────────────