using System; using System.Collections.Generic; using System.IO; using System.Runtime.InteropServices; using System.Text; using System.Text.Json; using System.Threading; using System.Threading.Tasks; namespace AxCopilot.Services.Agent; public class ImageAnalyzeTool : IAgentTool { public string Name => "image_analyze"; public string Description => "Analyze an image using LLM multimodal vision. Tasks: describe (general description), extract_text (OCR-like text extraction), extract_data (extract structured data like tables/charts from image), compare (compare two images and describe differences)."; public ToolParameterSchema Parameters { get { ToolParameterSchema toolParameterSchema = new ToolParameterSchema(); Dictionary obj = new Dictionary { ["image_path"] = new ToolProperty { Type = "string", Description = "Path to the image file (.png, .jpg, .jpeg, .bmp, .gif, .webp)." } }; ToolProperty obj2 = new ToolProperty { Type = "string", Description = "Analysis task: describe, extract_text, extract_data, compare. Default: describe" }; int num = 4; List list = new List(num); CollectionsMarshal.SetCount(list, num); Span span = CollectionsMarshal.AsSpan(list); span[0] = "describe"; span[1] = "extract_text"; span[2] = "extract_data"; span[3] = "compare"; obj2.Enum = list; obj["task"] = obj2; obj["compare_path"] = new ToolProperty { Type = "string", Description = "Path to second image for comparison (only used with task=compare)." }; obj["question"] = new ToolProperty { Type = "string", Description = "Optional specific question about the image." }; obj["language"] = new ToolProperty { Type = "string", Description = "Response language: ko (Korean), en (English). Default: ko" }; toolParameterSchema.Properties = obj; num = 1; List list2 = new List(num); CollectionsMarshal.SetCount(list2, num); CollectionsMarshal.AsSpan(list2)[0] = "image_path"; toolParameterSchema.Required = list2; return toolParameterSchema; } } public async Task ExecuteAsync(JsonElement args, AgentContext context, CancellationToken ct) { string imagePath = args.GetProperty("image_path").GetString() ?? ""; JsonElement taskEl; string task = (args.TryGetProperty("task", out taskEl) ? (taskEl.GetString() ?? "describe") : "describe"); JsonElement qEl; string question = (args.TryGetProperty("question", out qEl) ? (qEl.GetString() ?? "") : ""); JsonElement langEl; string language = (args.TryGetProperty("language", out langEl) ? (langEl.GetString() ?? "ko") : "ko"); string fullPath = FileReadTool.ResolvePath(imagePath, context.WorkFolder); if (!context.IsPathAllowed(fullPath)) { return ToolResult.Fail("경로 접근 차단: " + fullPath); } if (!File.Exists(fullPath)) { return ToolResult.Fail("파일 없음: " + fullPath); } string ext = Path.GetExtension(fullPath).ToLowerInvariant(); if (!IsImageExtension(ext)) { return ToolResult.Fail("지원하지 않는 이미지 형식: " + ext); } byte[] imageBytes = await File.ReadAllBytesAsync(fullPath, ct); string base64 = Convert.ToBase64String(imageBytes); if (1 == 0) { } string text; switch (ext) { case ".png": text = "image/png"; break; case ".jpg": case ".jpeg": text = "image/jpeg"; break; case ".gif": text = "image/gif"; break; case ".webp": text = "image/webp"; break; case ".bmp": text = "image/bmp"; break; default: text = "image/png"; break; } if (1 == 0) { } string mimeType = text; if (imageBytes.Length > 10485760) { return ToolResult.Fail("이미지 크기가 10MB를 초과합니다."); } string compareBase64 = null; string compareMime = null; if (task == "compare" && args.TryGetProperty("compare_path", out var cpEl)) { string comparePath = FileReadTool.ResolvePath(cpEl.GetString() ?? "", context.WorkFolder); if (File.Exists(comparePath) && context.IsPathAllowed(comparePath)) { compareBase64 = Convert.ToBase64String(await File.ReadAllBytesAsync(comparePath, ct)); string compareExt = Path.GetExtension(comparePath).ToLowerInvariant(); if (1 == 0) { } switch (compareExt) { case ".png": text = "image/png"; break; case ".jpg": case ".jpeg": text = "image/jpeg"; break; case ".gif": text = "image/gif"; break; case ".webp": text = "image/webp"; break; default: text = "image/png"; break; } if (1 == 0) { } compareMime = text; } } string langPrompt = ((language == "en") ? "Respond in English." : "한국어로 응답하세요."); if (1 == 0) { } text = task switch { "extract_text" => "이 이미지에서 모든 텍스트를 추출하세요. 원본 레이아웃을 최대한 유지하세요. " + langPrompt, "extract_data" => "이 이미지에서 구조화된 데이터를 추출하세요. 테이블, 차트, 그래프 등의 데이터를 CSV 또는 JSON 형식으로 변환하세요. 차트의 경우 각 항목의 값을 추정하세요. " + langPrompt, "compare" => "두 이미지를 비교하고 차이점을 설명하세요. " + langPrompt, _ => string.IsNullOrEmpty(question) ? ("이 이미지의 내용을 상세하게 설명하세요. 주요 요소, 텍스트, 레이아웃, 색상 등을 포함하세요. " + langPrompt) : (question + " " + langPrompt), }; if (1 == 0) { } string prompt = text; StringBuilder info = new StringBuilder(); info.AppendLine("\ud83d\uddbc 이미지 분석 준비 완료"); StringBuilder stringBuilder = info; StringBuilder stringBuilder2 = stringBuilder; StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(6, 1, stringBuilder); handler.AppendLiteral(" 파일: "); handler.AppendFormatted(Path.GetFileName(fullPath)); stringBuilder2.AppendLine(ref handler); stringBuilder = info; StringBuilder stringBuilder3 = stringBuilder; handler = new StringBuilder.AppendInterpolatedStringHandler(8, 1, stringBuilder); handler.AppendLiteral(" 크기: "); handler.AppendFormatted(imageBytes.Length / 1024); handler.AppendLiteral("KB"); stringBuilder3.AppendLine(ref handler); stringBuilder = info; StringBuilder stringBuilder4 = stringBuilder; handler = new StringBuilder.AppendInterpolatedStringHandler(6, 1, stringBuilder); handler.AppendLiteral(" 형식: "); handler.AppendFormatted(mimeType); stringBuilder4.AppendLine(ref handler); stringBuilder = info; StringBuilder stringBuilder5 = stringBuilder; handler = new StringBuilder.AppendInterpolatedStringHandler(6, 1, stringBuilder); handler.AppendLiteral(" 작업: "); handler.AppendFormatted(task); stringBuilder5.AppendLine(ref handler); info.AppendLine(); stringBuilder = info; StringBuilder stringBuilder6 = stringBuilder; handler = new StringBuilder.AppendInterpolatedStringHandler(30, 2, stringBuilder); handler.AppendLiteral("[IMAGE_BASE64:"); handler.AppendFormatted(mimeType); handler.AppendLiteral("]"); handler.AppendFormatted(base64); handler.AppendLiteral("[/IMAGE_BASE64]"); stringBuilder6.AppendLine(ref handler); if (compareBase64 != null) { stringBuilder = info; StringBuilder stringBuilder7 = stringBuilder; handler = new StringBuilder.AppendInterpolatedStringHandler(30, 2, stringBuilder); handler.AppendLiteral("[IMAGE_BASE64:"); handler.AppendFormatted(compareMime); handler.AppendLiteral("]"); handler.AppendFormatted(compareBase64); handler.AppendLiteral("[/IMAGE_BASE64]"); stringBuilder7.AppendLine(ref handler); } info.AppendLine(); stringBuilder = info; StringBuilder stringBuilder8 = stringBuilder; handler = new StringBuilder.AppendInterpolatedStringHandler(9, 1, stringBuilder); handler.AppendLiteral("분석 프롬프트: "); handler.AppendFormatted(prompt); stringBuilder8.AppendLine(ref handler); return ToolResult.Ok(info.ToString()); } private static bool IsImageExtension(string ext) { switch (ext) { case ".png": case ".jpg": case ".jpeg": case ".gif": case ".bmp": case ".webp": return true; default: return false; } } }