using System.IO; using System.Text; namespace AxCopilot.Services.Agent; /// /// 텍스트 파일 인코딩 감지/읽기/쓰기 유틸. /// - 읽기: BOM + UTF-8 유효성 검사 기반 자동 감지 /// - 쓰기: 기존 파일 인코딩/UTF-8 BOM 여부를 최대한 보존 /// public static class TextFileCodec { public readonly record struct TextReadResult(string Text, Encoding Encoding, bool HasBom); static TextFileCodec() { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); } public static TextReadResult ReadAllText(string path) { var bytes = File.ReadAllBytes(path); return Decode(bytes); } public static async Task ReadAllTextAsync(string path, CancellationToken ct = default) { var bytes = await File.ReadAllBytesAsync(path, ct); return Decode(bytes); } public static string[] SplitLines(string text) => text.Split('\n'); public static Encoding ResolveWriteEncoding(Encoding sourceEncoding, bool sourceHasBom) { if (sourceEncoding.CodePage == Encoding.UTF8.CodePage) return new UTF8Encoding(sourceHasBom); return sourceEncoding; } public static async Task WriteAllTextAsync(string path, string content, Encoding encoding, CancellationToken ct = default) { var dir = Path.GetDirectoryName(path); if (!string.IsNullOrWhiteSpace(dir)) Directory.CreateDirectory(dir); await using var stream = new FileStream(path, FileMode.Create, FileAccess.Write, FileShare.Read, 4096, useAsync: true); await using var writer = new StreamWriter(stream, encoding); await writer.WriteAsync(content.AsMemory(), ct); await writer.FlushAsync(); } public static Encoding Utf8NoBom => new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); private static TextReadResult Decode(byte[] bytes) { var detected = DetectEncoding(bytes, out var bomLength, out var hasBom); var payload = bomLength > 0 ? bytes[bomLength..] : bytes; var text = detected.GetString(payload); return new TextReadResult(text, detected, hasBom); } private static Encoding DetectEncoding(byte[] bytes, out int bomLength, out bool hasBom) { // UTF-8 BOM if (bytes.Length >= 3 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) { bomLength = 3; hasBom = true; return Encoding.UTF8; } // UTF-16 LE BOM if (bytes.Length >= 2 && bytes[0] == 0xFF && bytes[1] == 0xFE) { bomLength = 2; hasBom = true; return Encoding.Unicode; } // UTF-16 BE BOM if (bytes.Length >= 2 && bytes[0] == 0xFE && bytes[1] == 0xFF) { bomLength = 2; hasBom = true; return Encoding.BigEndianUnicode; } bomLength = 0; hasBom = false; if (IsValidUtf8(bytes)) return new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); // 한국어 Windows 환경 호환 fallback try { return Encoding.GetEncoding("euc-kr"); } catch { return Encoding.Default; } } private static bool IsValidUtf8(byte[] bytes) { var i = 0; while (i < bytes.Length) { if (bytes[i] <= 0x7F) { i++; continue; } int extra; if ((bytes[i] & 0xE0) == 0xC0) extra = 1; else if ((bytes[i] & 0xF0) == 0xE0) extra = 2; else if ((bytes[i] & 0xF8) == 0xF0) extra = 3; else return false; if (i + extra >= bytes.Length) return false; for (var j = 1; j <= extra; j++) { if ((bytes[i + j] & 0xC0) != 0x80) return false; } i += extra + 1; } return true; } }