using System.IO; using System.Text; using System.Text.Json; using System.Text.RegularExpressions; namespace AxCopilot.Services.Agent; /// /// CSV/JSON 데이터를 그룹화, 피벗, 집계하는 도구. /// LINQ 기반 순수 C# 구현으로 외부 의존성 없음. /// public class DataPivotTool : IAgentTool { public string Name => "data_pivot"; public string Description => "Group, pivot, and aggregate CSV/JSON data without external dependencies. " + "Supports: group_by columns, aggregate functions (sum/avg/count/min/max), " + "filter conditions, sorting, and output as table/csv/json."; public ToolParameterSchema Parameters => new() { Properties = new() { ["source_path"] = new() { Type = "string", Description = "Path to CSV or JSON data file." }, ["group_by"] = new() { Type = "array", Description = "Column names to group by.", Items = new() { Type = "string" } }, ["aggregates"] = new() { Type = "array", Description = "Aggregation specs: [{\"column\": \"sales\", \"function\": \"sum\"}, ...]. " + "Functions: sum, avg, count, min, max.", Items = new() { Type = "object" } }, ["filter"] = new() { Type = "string", Description = "Optional filter expression: 'column == value' or 'column > 100'. " + "Supports: ==, !=, >, <, >=, <=, contains. " + "Multiple conditions: 'region == Seoul AND year >= 2025'." }, ["sort_by"] = new() { Type = "string", Description = "Column name to sort results by. Prefix with '-' for descending." }, ["top_n"] = new() { Type = "integer", Description = "Limit results to top N rows. Default: all rows." }, ["output_format"] = new() { Type = "string", Description = "Output format: table (markdown), csv, json. Default: table", Enum = ["table", "csv", "json"] }, }, Required = ["source_path"] }; public Task ExecuteAsync(JsonElement args, AgentContext context, CancellationToken ct) { var sourcePath = args.GetProperty("source_path").GetString() ?? ""; var fullPath = FileReadTool.ResolvePath(sourcePath, context.WorkFolder); if (!context.IsPathAllowed(fullPath)) return Task.FromResult(ToolResult.Fail($"경로 접근 차단: {fullPath}")); if (!File.Exists(fullPath)) return Task.FromResult(ToolResult.Fail($"파일 없음: {fullPath}")); try { // 데이터 로드 var ext = Path.GetExtension(fullPath).ToLowerInvariant(); List> data; if (ext == ".json") data = LoadJson(fullPath); else data = LoadCsv(fullPath); if (data.Count == 0) return Task.FromResult(ToolResult.Fail("데이터가 비어있습니다.")); var originalCount = data.Count; // 필터 적용 if (args.TryGetProperty("filter", out var filterEl)) { var filterStr = filterEl.GetString() ?? ""; if (!string.IsNullOrWhiteSpace(filterStr)) data = ApplyFilter(data, filterStr); } // 그룹화 & 집계 List> result; if (args.TryGetProperty("group_by", out var groupEl) && groupEl.ValueKind == JsonValueKind.Array) { var groupCols = new List(); foreach (var g in groupEl.EnumerateArray()) groupCols.Add(g.GetString() ?? ""); var aggregates = new List<(string Column, string Function)>(); if (args.TryGetProperty("aggregates", out var aggEl) && aggEl.ValueKind == JsonValueKind.Array) { foreach (var agg in aggEl.EnumerateArray()) { var col = agg.TryGetProperty("column", out var c) ? c.GetString() ?? "" : ""; var func = agg.TryGetProperty("function", out var f) ? f.GetString() ?? "count" : "count"; if (!string.IsNullOrEmpty(col)) aggregates.Add((col, func)); } } result = GroupAndAggregate(data, groupCols, aggregates); } else { result = data; } // 정렬 if (args.TryGetProperty("sort_by", out var sortEl)) { var sortBy = sortEl.GetString() ?? ""; if (!string.IsNullOrWhiteSpace(sortBy)) result = ApplySort(result, sortBy); } // Top N if (args.TryGetProperty("top_n", out var topEl) && topEl.TryGetInt32(out var topN) && topN > 0) result = result.Take(topN).ToList(); // 출력 포맷 var outputFormat = args.TryGetProperty("output_format", out var ofmt) ? ofmt.GetString() ?? "table" : "table"; var output = FormatOutput(result, outputFormat); return Task.FromResult(ToolResult.Ok( $"📊 데이터 피벗 완료: {originalCount}행 → 필터 후 {data.Count}행 → 결과 {result.Count}행\n\n{output}")); } catch (Exception ex) { return Task.FromResult(ToolResult.Fail($"데이터 피벗 실패: {ex.Message}")); } } private static List> LoadCsv(string path) { var read = TextFileCodec.ReadAllText(path); var lines = TextFileCodec.SplitLines(read.Text); if (lines.Length < 2) return new(); var headers = ParseCsvLine(lines[0]); var data = new List>(); for (int i = 1; i < lines.Length; i++) { if (string.IsNullOrWhiteSpace(lines[i])) continue; var values = ParseCsvLine(lines[i]); var row = new Dictionary(StringComparer.OrdinalIgnoreCase); for (int j = 0; j < headers.Count && j < values.Count; j++) row[headers[j]] = values[j]; data.Add(row); } return data; } private static List ParseCsvLine(string line) { var result = new List(); var sb = new StringBuilder(); bool inQuote = false; for (int i = 0; i < line.Length; i++) { var c = line[i]; if (c == '"') { inQuote = !inQuote; continue; } if (c == ',' && !inQuote) { result.Add(sb.ToString().Trim()); sb.Clear(); continue; } sb.Append(c); } result.Add(sb.ToString().Trim()); return result; } private static List> LoadJson(string path) { var json = TextFileCodec.ReadAllText(path).Text; var doc = JsonDocument.Parse(json); var data = new List>(); var arr = doc.RootElement.ValueKind == JsonValueKind.Array ? doc.RootElement : doc.RootElement.TryGetProperty("data", out var d) ? d : doc.RootElement; if (arr.ValueKind != JsonValueKind.Array) return data; foreach (var item in arr.EnumerateArray()) { var row = new Dictionary(StringComparer.OrdinalIgnoreCase); foreach (var prop in item.EnumerateObject()) row[prop.Name] = prop.Value.ToString(); data.Add(row); } return data; } private static List> ApplyFilter(List> data, string filter) { var conditions = filter.Split(new[] { " AND ", " and " }, StringSplitOptions.TrimEntries); var result = data; foreach (var cond in conditions) { var match = Regex.Match(cond, @"(\w+)\s*(==|!=|>=|<=|>|<|contains)\s*(.+)"); if (!match.Success) continue; var col = match.Groups[1].Value; var op = match.Groups[2].Value; var val = match.Groups[3].Value.Trim().Trim('\'', '"'); result = result.Where(row => { if (!row.TryGetValue(col, out var cellVal)) return false; return op switch { "==" => cellVal.Equals(val, StringComparison.OrdinalIgnoreCase), "!=" => !cellVal.Equals(val, StringComparison.OrdinalIgnoreCase), "contains" => cellVal.Contains(val, StringComparison.OrdinalIgnoreCase), ">" => double.TryParse(cellVal, out var a) && double.TryParse(val, out var b) && a > b, "<" => double.TryParse(cellVal, out var a2) && double.TryParse(val, out var b2) && a2 < b2, ">=" => double.TryParse(cellVal, out var a3) && double.TryParse(val, out var b3) && a3 >= b3, "<=" => double.TryParse(cellVal, out var a4) && double.TryParse(val, out var b4) && a4 <= b4, _ => true }; }).ToList(); } return result; } private static List> GroupAndAggregate( List> data, List groupCols, List<(string Column, string Function)> aggregates) { var groups = data.GroupBy(row => { var key = new StringBuilder(); foreach (var col in groupCols) { row.TryGetValue(col, out var val); key.Append(val ?? "").Append('|'); } return key.ToString(); }); var result = new List>(); foreach (var group in groups) { var row = new Dictionary(StringComparer.OrdinalIgnoreCase); // 그룹 키 컬럼 var first = group.First(); foreach (var col in groupCols) row[col] = first.TryGetValue(col, out var v) ? v : ""; // 집계 컬럼 foreach (var (aggCol, func) in aggregates) { var values = group .Select(r => r.TryGetValue(aggCol, out var v) ? v : "") .Where(v => double.TryParse(v, out _)) .Select(v => double.Parse(v)) .ToList(); var aggResult = func.ToLowerInvariant() switch { "sum" => values.Sum(), "avg" or "average" => values.Count > 0 ? values.Average() : 0, "min" => values.Count > 0 ? values.Min() : 0, "max" => values.Count > 0 ? values.Max() : 0, "count" => group.Count(), _ => (double)group.Count() }; var label = $"{aggCol}_{func}"; row[label] = func == "count" ? ((int)aggResult).ToString() : aggResult.ToString("F2"); } // count 집계가 없으면 기본 count 추가 if (aggregates.Count == 0) row["count"] = group.Count().ToString(); result.Add(row); } return result; } private static List> ApplySort(List> data, string sortBy) { bool desc = sortBy.StartsWith('-'); var col = sortBy.TrimStart('-'); return (desc ? data.OrderByDescending(r => GetSortKey(r, col)) : data.OrderBy(r => GetSortKey(r, col)) ).ToList(); } private static object GetSortKey(Dictionary row, string col) { if (!row.TryGetValue(col, out var val)) return ""; if (double.TryParse(val, out var num)) return num; return val; } private static string FormatOutput(List> data, string format) { if (data.Count == 0) return "(결과 없음)"; var columns = data.SelectMany(r => r.Keys).Distinct().ToList(); switch (format) { case "json": return JsonSerializer.Serialize(data, new JsonSerializerOptions { WriteIndented = true, Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping, }); case "csv": var csvSb = new StringBuilder(); csvSb.AppendLine(string.Join(",", columns)); foreach (var row in data) { var vals = columns.Select(c => row.TryGetValue(c, out var v) ? $"\"{v}\"" : "\"\""); csvSb.AppendLine(string.Join(",", vals)); } return csvSb.ToString(); default: // table (markdown) var sb = new StringBuilder(); // 헤더 sb.AppendLine("| " + string.Join(" | ", columns) + " |"); sb.AppendLine("| " + string.Join(" | ", columns.Select(_ => "---")) + " |"); // 행 foreach (var row in data) { var vals = columns.Select(c => row.TryGetValue(c, out var v) ? v : ""); sb.AppendLine("| " + string.Join(" | ", vals) + " |"); } return sb.ToString(); } } }