Files
AX-Copilot-Codex/src/AxCopilot/Services/Agent/DataPivotTool.cs

360 lines
14 KiB
C#

using System.IO;
using System.Text;
using System.Text.Json;
using System.Text.RegularExpressions;
namespace AxCopilot.Services.Agent;
/// <summary>
/// CSV/JSON 데이터를 그룹화, 피벗, 집계하는 도구.
/// LINQ 기반 순수 C# 구현으로 외부 의존성 없음.
/// </summary>
public class DataPivotTool : IAgentTool
{
public string Name => "data_pivot";
public string Description =>
"Group, pivot, and aggregate CSV/JSON data without external dependencies. " +
"Supports: group_by columns, aggregate functions (sum/avg/count/min/max), " +
"filter conditions, sorting, and output as table/csv/json.";
public ToolParameterSchema Parameters => new()
{
Properties = new()
{
["source_path"] = new() { Type = "string", Description = "Path to CSV or JSON data file." },
["group_by"] = new()
{
Type = "array",
Description = "Column names to group by.",
Items = new() { Type = "string" }
},
["aggregates"] = new()
{
Type = "array",
Description = "Aggregation specs: [{\"column\": \"sales\", \"function\": \"sum\"}, ...]. " +
"Functions: sum, avg, count, min, max.",
Items = new() { Type = "object" }
},
["filter"] = new()
{
Type = "string",
Description = "Optional filter expression: 'column == value' or 'column > 100'. " +
"Supports: ==, !=, >, <, >=, <=, contains. " +
"Multiple conditions: 'region == Seoul AND year >= 2025'."
},
["sort_by"] = new() { Type = "string", Description = "Column name to sort results by. Prefix with '-' for descending." },
["top_n"] = new() { Type = "integer", Description = "Limit results to top N rows. Default: all rows." },
["output_format"] = new()
{
Type = "string",
Description = "Output format: table (markdown), csv, json. Default: table",
Enum = ["table", "csv", "json"]
},
},
Required = ["source_path"]
};
public Task<ToolResult> ExecuteAsync(JsonElement args, AgentContext context, CancellationToken ct)
{
var sourcePath = args.GetProperty("source_path").GetString() ?? "";
var fullPath = FileReadTool.ResolvePath(sourcePath, context.WorkFolder);
if (!context.IsPathAllowed(fullPath))
return Task.FromResult(ToolResult.Fail($"경로 접근 차단: {fullPath}"));
if (!File.Exists(fullPath))
return Task.FromResult(ToolResult.Fail($"파일 없음: {fullPath}"));
try
{
// 데이터 로드
var ext = Path.GetExtension(fullPath).ToLowerInvariant();
List<Dictionary<string, string>> data;
if (ext == ".json")
data = LoadJson(fullPath);
else
data = LoadCsv(fullPath);
if (data.Count == 0)
return Task.FromResult(ToolResult.Fail("데이터가 비어있습니다."));
var originalCount = data.Count;
// 필터 적용
if (args.TryGetProperty("filter", out var filterEl))
{
var filterStr = filterEl.GetString() ?? "";
if (!string.IsNullOrWhiteSpace(filterStr))
data = ApplyFilter(data, filterStr);
}
// 그룹화 & 집계
List<Dictionary<string, string>> result;
if (args.TryGetProperty("group_by", out var groupEl) && groupEl.ValueKind == JsonValueKind.Array)
{
var groupCols = new List<string>();
foreach (var g in groupEl.EnumerateArray())
groupCols.Add(g.GetString() ?? "");
var aggregates = new List<(string Column, string Function)>();
if (args.TryGetProperty("aggregates", out var aggEl) && aggEl.ValueKind == JsonValueKind.Array)
{
foreach (var agg in aggEl.EnumerateArray())
{
var col = agg.TryGetProperty("column", out var c) ? c.GetString() ?? "" : "";
var func = agg.TryGetProperty("function", out var f) ? f.GetString() ?? "count" : "count";
if (!string.IsNullOrEmpty(col))
aggregates.Add((col, func));
}
}
result = GroupAndAggregate(data, groupCols, aggregates);
}
else
{
result = data;
}
// 정렬
if (args.TryGetProperty("sort_by", out var sortEl))
{
var sortBy = sortEl.GetString() ?? "";
if (!string.IsNullOrWhiteSpace(sortBy))
result = ApplySort(result, sortBy);
}
// Top N
if (args.TryGetProperty("top_n", out var topEl) && topEl.TryGetInt32(out var topN) && topN > 0)
result = result.Take(topN).ToList();
// 출력 포맷
var outputFormat = args.TryGetProperty("output_format", out var ofmt) ? ofmt.GetString() ?? "table" : "table";
var output = FormatOutput(result, outputFormat);
return Task.FromResult(ToolResult.Ok(
$"📊 데이터 피벗 완료: {originalCount}행 → 필터 후 {data.Count}행 → 결과 {result.Count}행\n\n{output}"));
}
catch (Exception ex)
{
return Task.FromResult(ToolResult.Fail($"데이터 피벗 실패: {ex.Message}"));
}
}
private static List<Dictionary<string, string>> LoadCsv(string path)
{
var read = TextFileCodec.ReadAllText(path);
var lines = TextFileCodec.SplitLines(read.Text);
if (lines.Length < 2) return new();
var headers = ParseCsvLine(lines[0]);
var data = new List<Dictionary<string, string>>();
for (int i = 1; i < lines.Length; i++)
{
if (string.IsNullOrWhiteSpace(lines[i])) continue;
var values = ParseCsvLine(lines[i]);
var row = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
for (int j = 0; j < headers.Count && j < values.Count; j++)
row[headers[j]] = values[j];
data.Add(row);
}
return data;
}
private static List<string> ParseCsvLine(string line)
{
var result = new List<string>();
var sb = new StringBuilder();
bool inQuote = false;
for (int i = 0; i < line.Length; i++)
{
var c = line[i];
if (c == '"') { inQuote = !inQuote; continue; }
if (c == ',' && !inQuote)
{
result.Add(sb.ToString().Trim());
sb.Clear();
continue;
}
sb.Append(c);
}
result.Add(sb.ToString().Trim());
return result;
}
private static List<Dictionary<string, string>> LoadJson(string path)
{
var json = TextFileCodec.ReadAllText(path).Text;
var doc = JsonDocument.Parse(json);
var data = new List<Dictionary<string, string>>();
var arr = doc.RootElement.ValueKind == JsonValueKind.Array
? doc.RootElement
: doc.RootElement.TryGetProperty("data", out var d) ? d : doc.RootElement;
if (arr.ValueKind != JsonValueKind.Array) return data;
foreach (var item in arr.EnumerateArray())
{
var row = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
foreach (var prop in item.EnumerateObject())
row[prop.Name] = prop.Value.ToString();
data.Add(row);
}
return data;
}
private static List<Dictionary<string, string>> ApplyFilter(List<Dictionary<string, string>> data, string filter)
{
var conditions = filter.Split(new[] { " AND ", " and " }, StringSplitOptions.TrimEntries);
var result = data;
foreach (var cond in conditions)
{
var match = Regex.Match(cond, @"(\w+)\s*(==|!=|>=|<=|>|<|contains)\s*(.+)");
if (!match.Success) continue;
var col = match.Groups[1].Value;
var op = match.Groups[2].Value;
var val = match.Groups[3].Value.Trim().Trim('\'', '"');
result = result.Where(row =>
{
if (!row.TryGetValue(col, out var cellVal)) return false;
return op switch
{
"==" => cellVal.Equals(val, StringComparison.OrdinalIgnoreCase),
"!=" => !cellVal.Equals(val, StringComparison.OrdinalIgnoreCase),
"contains" => cellVal.Contains(val, StringComparison.OrdinalIgnoreCase),
">" => double.TryParse(cellVal, out var a) && double.TryParse(val, out var b) && a > b,
"<" => double.TryParse(cellVal, out var a2) && double.TryParse(val, out var b2) && a2 < b2,
">=" => double.TryParse(cellVal, out var a3) && double.TryParse(val, out var b3) && a3 >= b3,
"<=" => double.TryParse(cellVal, out var a4) && double.TryParse(val, out var b4) && a4 <= b4,
_ => true
};
}).ToList();
}
return result;
}
private static List<Dictionary<string, string>> GroupAndAggregate(
List<Dictionary<string, string>> data,
List<string> groupCols,
List<(string Column, string Function)> aggregates)
{
var groups = data.GroupBy(row =>
{
var key = new StringBuilder();
foreach (var col in groupCols)
{
row.TryGetValue(col, out var val);
key.Append(val ?? "").Append('|');
}
return key.ToString();
});
var result = new List<Dictionary<string, string>>();
foreach (var group in groups)
{
var row = new Dictionary<string, string>(StringComparer.OrdinalIgnoreCase);
// 그룹 키 컬럼
var first = group.First();
foreach (var col in groupCols)
row[col] = first.TryGetValue(col, out var v) ? v : "";
// 집계 컬럼
foreach (var (aggCol, func) in aggregates)
{
var values = group
.Select(r => r.TryGetValue(aggCol, out var v) ? v : "")
.Where(v => double.TryParse(v, out _))
.Select(v => double.Parse(v))
.ToList();
var aggResult = func.ToLowerInvariant() switch
{
"sum" => values.Sum(),
"avg" or "average" => values.Count > 0 ? values.Average() : 0,
"min" => values.Count > 0 ? values.Min() : 0,
"max" => values.Count > 0 ? values.Max() : 0,
"count" => group.Count(),
_ => (double)group.Count()
};
var label = $"{aggCol}_{func}";
row[label] = func == "count" ? ((int)aggResult).ToString() : aggResult.ToString("F2");
}
// count 집계가 없으면 기본 count 추가
if (aggregates.Count == 0)
row["count"] = group.Count().ToString();
result.Add(row);
}
return result;
}
private static List<Dictionary<string, string>> ApplySort(List<Dictionary<string, string>> data, string sortBy)
{
bool desc = sortBy.StartsWith('-');
var col = sortBy.TrimStart('-');
return (desc
? data.OrderByDescending(r => GetSortKey(r, col))
: data.OrderBy(r => GetSortKey(r, col))
).ToList();
}
private static object GetSortKey(Dictionary<string, string> row, string col)
{
if (!row.TryGetValue(col, out var val)) return "";
if (double.TryParse(val, out var num)) return num;
return val;
}
private static string FormatOutput(List<Dictionary<string, string>> data, string format)
{
if (data.Count == 0) return "(결과 없음)";
var columns = data.SelectMany(r => r.Keys).Distinct().ToList();
switch (format)
{
case "json":
return JsonSerializer.Serialize(data, new JsonSerializerOptions
{
WriteIndented = true,
Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
});
case "csv":
var csvSb = new StringBuilder();
csvSb.AppendLine(string.Join(",", columns));
foreach (var row in data)
{
var vals = columns.Select(c => row.TryGetValue(c, out var v) ? $"\"{v}\"" : "\"\"");
csvSb.AppendLine(string.Join(",", vals));
}
return csvSb.ToString();
default: // table (markdown)
var sb = new StringBuilder();
// 헤더
sb.AppendLine("| " + string.Join(" | ", columns) + " |");
sb.AppendLine("| " + string.Join(" | ", columns.Select(_ => "---")) + " |");
// 행
foreach (var row in data)
{
var vals = columns.Select(c => row.TryGetValue(c, out var v) ? v : "");
sb.AppendLine("| " + string.Join(" | ", vals) + " |");
}
return sb.ToString();
}
}
}