Files

254 lines
8.2 KiB
C#

using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
using System.Text.Json;
using System.Threading;
using System.Threading.Tasks;
namespace AxCopilot.Services.Agent;
public class ImageAnalyzeTool : IAgentTool
{
public string Name => "image_analyze";
public string Description => "Analyze an image using LLM multimodal vision. Tasks: describe (general description), extract_text (OCR-like text extraction), extract_data (extract structured data like tables/charts from image), compare (compare two images and describe differences).";
public ToolParameterSchema Parameters
{
get
{
ToolParameterSchema toolParameterSchema = new ToolParameterSchema();
Dictionary<string, ToolProperty> obj = new Dictionary<string, ToolProperty> { ["image_path"] = new ToolProperty
{
Type = "string",
Description = "Path to the image file (.png, .jpg, .jpeg, .bmp, .gif, .webp)."
} };
ToolProperty obj2 = new ToolProperty
{
Type = "string",
Description = "Analysis task: describe, extract_text, extract_data, compare. Default: describe"
};
int num = 4;
List<string> list = new List<string>(num);
CollectionsMarshal.SetCount(list, num);
Span<string> span = CollectionsMarshal.AsSpan(list);
span[0] = "describe";
span[1] = "extract_text";
span[2] = "extract_data";
span[3] = "compare";
obj2.Enum = list;
obj["task"] = obj2;
obj["compare_path"] = new ToolProperty
{
Type = "string",
Description = "Path to second image for comparison (only used with task=compare)."
};
obj["question"] = new ToolProperty
{
Type = "string",
Description = "Optional specific question about the image."
};
obj["language"] = new ToolProperty
{
Type = "string",
Description = "Response language: ko (Korean), en (English). Default: ko"
};
toolParameterSchema.Properties = obj;
num = 1;
List<string> list2 = new List<string>(num);
CollectionsMarshal.SetCount(list2, num);
CollectionsMarshal.AsSpan(list2)[0] = "image_path";
toolParameterSchema.Required = list2;
return toolParameterSchema;
}
}
public async Task<ToolResult> ExecuteAsync(JsonElement args, AgentContext context, CancellationToken ct)
{
string imagePath = args.GetProperty("image_path").GetString() ?? "";
JsonElement taskEl;
string task = (args.TryGetProperty("task", out taskEl) ? (taskEl.GetString() ?? "describe") : "describe");
JsonElement qEl;
string question = (args.TryGetProperty("question", out qEl) ? (qEl.GetString() ?? "") : "");
JsonElement langEl;
string language = (args.TryGetProperty("language", out langEl) ? (langEl.GetString() ?? "ko") : "ko");
string fullPath = FileReadTool.ResolvePath(imagePath, context.WorkFolder);
if (!context.IsPathAllowed(fullPath))
{
return ToolResult.Fail("경로 접근 차단: " + fullPath);
}
if (!File.Exists(fullPath))
{
return ToolResult.Fail("파일 없음: " + fullPath);
}
string ext = Path.GetExtension(fullPath).ToLowerInvariant();
if (!IsImageExtension(ext))
{
return ToolResult.Fail("지원하지 않는 이미지 형식: " + ext);
}
byte[] imageBytes = await File.ReadAllBytesAsync(fullPath, ct);
string base64 = Convert.ToBase64String(imageBytes);
if (1 == 0)
{
}
string text;
switch (ext)
{
case ".png":
text = "image/png";
break;
case ".jpg":
case ".jpeg":
text = "image/jpeg";
break;
case ".gif":
text = "image/gif";
break;
case ".webp":
text = "image/webp";
break;
case ".bmp":
text = "image/bmp";
break;
default:
text = "image/png";
break;
}
if (1 == 0)
{
}
string mimeType = text;
if (imageBytes.Length > 10485760)
{
return ToolResult.Fail("이미지 크기가 10MB를 초과합니다.");
}
string compareBase64 = null;
string compareMime = null;
if (task == "compare" && args.TryGetProperty("compare_path", out var cpEl))
{
string comparePath = FileReadTool.ResolvePath(cpEl.GetString() ?? "", context.WorkFolder);
if (File.Exists(comparePath) && context.IsPathAllowed(comparePath))
{
compareBase64 = Convert.ToBase64String(await File.ReadAllBytesAsync(comparePath, ct));
string compareExt = Path.GetExtension(comparePath).ToLowerInvariant();
if (1 == 0)
{
}
switch (compareExt)
{
case ".png":
text = "image/png";
break;
case ".jpg":
case ".jpeg":
text = "image/jpeg";
break;
case ".gif":
text = "image/gif";
break;
case ".webp":
text = "image/webp";
break;
default:
text = "image/png";
break;
}
if (1 == 0)
{
}
compareMime = text;
}
}
string langPrompt = ((language == "en") ? "Respond in English." : "한국어로 응답하세요.");
if (1 == 0)
{
}
text = task switch
{
"extract_text" => "이 이미지에서 모든 텍스트를 추출하세요. 원본 레이아웃을 최대한 유지하세요. " + langPrompt,
"extract_data" => "이 이미지에서 구조화된 데이터를 추출하세요. 테이블, 차트, 그래프 등의 데이터를 CSV 또는 JSON 형식으로 변환하세요. 차트의 경우 각 항목의 값을 추정하세요. " + langPrompt,
"compare" => "두 이미지를 비교하고 차이점을 설명하세요. " + langPrompt,
_ => string.IsNullOrEmpty(question) ? ("이 이미지의 내용을 상세하게 설명하세요. 주요 요소, 텍스트, 레이아웃, 색상 등을 포함하세요. " + langPrompt) : (question + " " + langPrompt),
};
if (1 == 0)
{
}
string prompt = text;
StringBuilder info = new StringBuilder();
info.AppendLine("\ud83d\uddbc 이미지 분석 준비 완료");
StringBuilder stringBuilder = info;
StringBuilder stringBuilder2 = stringBuilder;
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(6, 1, stringBuilder);
handler.AppendLiteral(" 파일: ");
handler.AppendFormatted(Path.GetFileName(fullPath));
stringBuilder2.AppendLine(ref handler);
stringBuilder = info;
StringBuilder stringBuilder3 = stringBuilder;
handler = new StringBuilder.AppendInterpolatedStringHandler(8, 1, stringBuilder);
handler.AppendLiteral(" 크기: ");
handler.AppendFormatted(imageBytes.Length / 1024);
handler.AppendLiteral("KB");
stringBuilder3.AppendLine(ref handler);
stringBuilder = info;
StringBuilder stringBuilder4 = stringBuilder;
handler = new StringBuilder.AppendInterpolatedStringHandler(6, 1, stringBuilder);
handler.AppendLiteral(" 형식: ");
handler.AppendFormatted(mimeType);
stringBuilder4.AppendLine(ref handler);
stringBuilder = info;
StringBuilder stringBuilder5 = stringBuilder;
handler = new StringBuilder.AppendInterpolatedStringHandler(6, 1, stringBuilder);
handler.AppendLiteral(" 작업: ");
handler.AppendFormatted(task);
stringBuilder5.AppendLine(ref handler);
info.AppendLine();
stringBuilder = info;
StringBuilder stringBuilder6 = stringBuilder;
handler = new StringBuilder.AppendInterpolatedStringHandler(30, 2, stringBuilder);
handler.AppendLiteral("[IMAGE_BASE64:");
handler.AppendFormatted(mimeType);
handler.AppendLiteral("]");
handler.AppendFormatted(base64);
handler.AppendLiteral("[/IMAGE_BASE64]");
stringBuilder6.AppendLine(ref handler);
if (compareBase64 != null)
{
stringBuilder = info;
StringBuilder stringBuilder7 = stringBuilder;
handler = new StringBuilder.AppendInterpolatedStringHandler(30, 2, stringBuilder);
handler.AppendLiteral("[IMAGE_BASE64:");
handler.AppendFormatted(compareMime);
handler.AppendLiteral("]");
handler.AppendFormatted(compareBase64);
handler.AppendLiteral("[/IMAGE_BASE64]");
stringBuilder7.AppendLine(ref handler);
}
info.AppendLine();
stringBuilder = info;
StringBuilder stringBuilder8 = stringBuilder;
handler = new StringBuilder.AppendInterpolatedStringHandler(9, 1, stringBuilder);
handler.AppendLiteral("분석 프롬프트: ");
handler.AppendFormatted(prompt);
stringBuilder8.AppendLine(ref handler);
return ToolResult.Ok(info.ToString());
}
private static bool IsImageExtension(string ext)
{
switch (ext)
{
case ".png":
case ".jpg":
case ".jpeg":
case ".gif":
case ".bmp":
case ".webp":
return true;
default:
return false;
}
}
}