254 lines
8.2 KiB
C#
254 lines
8.2 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.IO;
|
|
using System.Runtime.InteropServices;
|
|
using System.Text;
|
|
using System.Text.Json;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
|
|
namespace AxCopilot.Services.Agent;
|
|
|
|
public class ImageAnalyzeTool : IAgentTool
|
|
{
|
|
public string Name => "image_analyze";
|
|
|
|
public string Description => "Analyze an image using LLM multimodal vision. Tasks: describe (general description), extract_text (OCR-like text extraction), extract_data (extract structured data like tables/charts from image), compare (compare two images and describe differences).";
|
|
|
|
public ToolParameterSchema Parameters
|
|
{
|
|
get
|
|
{
|
|
ToolParameterSchema toolParameterSchema = new ToolParameterSchema();
|
|
Dictionary<string, ToolProperty> obj = new Dictionary<string, ToolProperty> { ["image_path"] = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "Path to the image file (.png, .jpg, .jpeg, .bmp, .gif, .webp)."
|
|
} };
|
|
ToolProperty obj2 = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "Analysis task: describe, extract_text, extract_data, compare. Default: describe"
|
|
};
|
|
int num = 4;
|
|
List<string> list = new List<string>(num);
|
|
CollectionsMarshal.SetCount(list, num);
|
|
Span<string> span = CollectionsMarshal.AsSpan(list);
|
|
span[0] = "describe";
|
|
span[1] = "extract_text";
|
|
span[2] = "extract_data";
|
|
span[3] = "compare";
|
|
obj2.Enum = list;
|
|
obj["task"] = obj2;
|
|
obj["compare_path"] = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "Path to second image for comparison (only used with task=compare)."
|
|
};
|
|
obj["question"] = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "Optional specific question about the image."
|
|
};
|
|
obj["language"] = new ToolProperty
|
|
{
|
|
Type = "string",
|
|
Description = "Response language: ko (Korean), en (English). Default: ko"
|
|
};
|
|
toolParameterSchema.Properties = obj;
|
|
num = 1;
|
|
List<string> list2 = new List<string>(num);
|
|
CollectionsMarshal.SetCount(list2, num);
|
|
CollectionsMarshal.AsSpan(list2)[0] = "image_path";
|
|
toolParameterSchema.Required = list2;
|
|
return toolParameterSchema;
|
|
}
|
|
}
|
|
|
|
public async Task<ToolResult> ExecuteAsync(JsonElement args, AgentContext context, CancellationToken ct)
|
|
{
|
|
string imagePath = args.GetProperty("image_path").GetString() ?? "";
|
|
JsonElement taskEl;
|
|
string task = (args.TryGetProperty("task", out taskEl) ? (taskEl.GetString() ?? "describe") : "describe");
|
|
JsonElement qEl;
|
|
string question = (args.TryGetProperty("question", out qEl) ? (qEl.GetString() ?? "") : "");
|
|
JsonElement langEl;
|
|
string language = (args.TryGetProperty("language", out langEl) ? (langEl.GetString() ?? "ko") : "ko");
|
|
string fullPath = FileReadTool.ResolvePath(imagePath, context.WorkFolder);
|
|
if (!context.IsPathAllowed(fullPath))
|
|
{
|
|
return ToolResult.Fail("경로 접근 차단: " + fullPath);
|
|
}
|
|
if (!File.Exists(fullPath))
|
|
{
|
|
return ToolResult.Fail("파일 없음: " + fullPath);
|
|
}
|
|
string ext = Path.GetExtension(fullPath).ToLowerInvariant();
|
|
if (!IsImageExtension(ext))
|
|
{
|
|
return ToolResult.Fail("지원하지 않는 이미지 형식: " + ext);
|
|
}
|
|
byte[] imageBytes = await File.ReadAllBytesAsync(fullPath, ct);
|
|
string base64 = Convert.ToBase64String(imageBytes);
|
|
if (1 == 0)
|
|
{
|
|
}
|
|
string text;
|
|
switch (ext)
|
|
{
|
|
case ".png":
|
|
text = "image/png";
|
|
break;
|
|
case ".jpg":
|
|
case ".jpeg":
|
|
text = "image/jpeg";
|
|
break;
|
|
case ".gif":
|
|
text = "image/gif";
|
|
break;
|
|
case ".webp":
|
|
text = "image/webp";
|
|
break;
|
|
case ".bmp":
|
|
text = "image/bmp";
|
|
break;
|
|
default:
|
|
text = "image/png";
|
|
break;
|
|
}
|
|
if (1 == 0)
|
|
{
|
|
}
|
|
string mimeType = text;
|
|
if (imageBytes.Length > 10485760)
|
|
{
|
|
return ToolResult.Fail("이미지 크기가 10MB를 초과합니다.");
|
|
}
|
|
string compareBase64 = null;
|
|
string compareMime = null;
|
|
if (task == "compare" && args.TryGetProperty("compare_path", out var cpEl))
|
|
{
|
|
string comparePath = FileReadTool.ResolvePath(cpEl.GetString() ?? "", context.WorkFolder);
|
|
if (File.Exists(comparePath) && context.IsPathAllowed(comparePath))
|
|
{
|
|
compareBase64 = Convert.ToBase64String(await File.ReadAllBytesAsync(comparePath, ct));
|
|
string compareExt = Path.GetExtension(comparePath).ToLowerInvariant();
|
|
if (1 == 0)
|
|
{
|
|
}
|
|
switch (compareExt)
|
|
{
|
|
case ".png":
|
|
text = "image/png";
|
|
break;
|
|
case ".jpg":
|
|
case ".jpeg":
|
|
text = "image/jpeg";
|
|
break;
|
|
case ".gif":
|
|
text = "image/gif";
|
|
break;
|
|
case ".webp":
|
|
text = "image/webp";
|
|
break;
|
|
default:
|
|
text = "image/png";
|
|
break;
|
|
}
|
|
if (1 == 0)
|
|
{
|
|
}
|
|
compareMime = text;
|
|
}
|
|
}
|
|
string langPrompt = ((language == "en") ? "Respond in English." : "한국어로 응답하세요.");
|
|
if (1 == 0)
|
|
{
|
|
}
|
|
text = task switch
|
|
{
|
|
"extract_text" => "이 이미지에서 모든 텍스트를 추출하세요. 원본 레이아웃을 최대한 유지하세요. " + langPrompt,
|
|
"extract_data" => "이 이미지에서 구조화된 데이터를 추출하세요. 테이블, 차트, 그래프 등의 데이터를 CSV 또는 JSON 형식으로 변환하세요. 차트의 경우 각 항목의 값을 추정하세요. " + langPrompt,
|
|
"compare" => "두 이미지를 비교하고 차이점을 설명하세요. " + langPrompt,
|
|
_ => string.IsNullOrEmpty(question) ? ("이 이미지의 내용을 상세하게 설명하세요. 주요 요소, 텍스트, 레이아웃, 색상 등을 포함하세요. " + langPrompt) : (question + " " + langPrompt),
|
|
};
|
|
if (1 == 0)
|
|
{
|
|
}
|
|
string prompt = text;
|
|
StringBuilder info = new StringBuilder();
|
|
info.AppendLine("\ud83d\uddbc 이미지 분석 준비 완료");
|
|
StringBuilder stringBuilder = info;
|
|
StringBuilder stringBuilder2 = stringBuilder;
|
|
StringBuilder.AppendInterpolatedStringHandler handler = new StringBuilder.AppendInterpolatedStringHandler(6, 1, stringBuilder);
|
|
handler.AppendLiteral(" 파일: ");
|
|
handler.AppendFormatted(Path.GetFileName(fullPath));
|
|
stringBuilder2.AppendLine(ref handler);
|
|
stringBuilder = info;
|
|
StringBuilder stringBuilder3 = stringBuilder;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(8, 1, stringBuilder);
|
|
handler.AppendLiteral(" 크기: ");
|
|
handler.AppendFormatted(imageBytes.Length / 1024);
|
|
handler.AppendLiteral("KB");
|
|
stringBuilder3.AppendLine(ref handler);
|
|
stringBuilder = info;
|
|
StringBuilder stringBuilder4 = stringBuilder;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(6, 1, stringBuilder);
|
|
handler.AppendLiteral(" 형식: ");
|
|
handler.AppendFormatted(mimeType);
|
|
stringBuilder4.AppendLine(ref handler);
|
|
stringBuilder = info;
|
|
StringBuilder stringBuilder5 = stringBuilder;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(6, 1, stringBuilder);
|
|
handler.AppendLiteral(" 작업: ");
|
|
handler.AppendFormatted(task);
|
|
stringBuilder5.AppendLine(ref handler);
|
|
info.AppendLine();
|
|
stringBuilder = info;
|
|
StringBuilder stringBuilder6 = stringBuilder;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(30, 2, stringBuilder);
|
|
handler.AppendLiteral("[IMAGE_BASE64:");
|
|
handler.AppendFormatted(mimeType);
|
|
handler.AppendLiteral("]");
|
|
handler.AppendFormatted(base64);
|
|
handler.AppendLiteral("[/IMAGE_BASE64]");
|
|
stringBuilder6.AppendLine(ref handler);
|
|
if (compareBase64 != null)
|
|
{
|
|
stringBuilder = info;
|
|
StringBuilder stringBuilder7 = stringBuilder;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(30, 2, stringBuilder);
|
|
handler.AppendLiteral("[IMAGE_BASE64:");
|
|
handler.AppendFormatted(compareMime);
|
|
handler.AppendLiteral("]");
|
|
handler.AppendFormatted(compareBase64);
|
|
handler.AppendLiteral("[/IMAGE_BASE64]");
|
|
stringBuilder7.AppendLine(ref handler);
|
|
}
|
|
info.AppendLine();
|
|
stringBuilder = info;
|
|
StringBuilder stringBuilder8 = stringBuilder;
|
|
handler = new StringBuilder.AppendInterpolatedStringHandler(9, 1, stringBuilder);
|
|
handler.AppendLiteral("분석 프롬프트: ");
|
|
handler.AppendFormatted(prompt);
|
|
stringBuilder8.AppendLine(ref handler);
|
|
return ToolResult.Ok(info.ToString());
|
|
}
|
|
|
|
private static bool IsImageExtension(string ext)
|
|
{
|
|
switch (ext)
|
|
{
|
|
case ".png":
|
|
case ".jpg":
|
|
case ".jpeg":
|
|
case ".gif":
|
|
case ".bmp":
|
|
case ".webp":
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
}
|