using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Security.Cryptography; using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; using System.Windows; using Microsoft.Data.Sqlite; namespace AxCopilot.Services; public class CodeIndexService : IDisposable { private SqliteConnection? _db; private string _workFolder = ""; private bool _indexed; private int _totalDocs; private static readonly HashSet StopWords = new HashSet(StringComparer.OrdinalIgnoreCase) { "the", "is", "at", "of", "on", "and", "or", "not", "in", "to", "for", "it", "be", "as", "do", "by", "this", "that", "with", "from", "but", "an", "are", "was", "were", "been", "being", "have", "has", "had", "if", "else", "then", "than", "so", "no", "yes", "var", "int", "string", "void", "null", "new", "return", "get", "set", "public", "private", "class", "static", "using", "namespace", "true", "false", "import", "export", "function", "const", "let", "def", "self" }; private static readonly HashSet CodeExtensions = new HashSet(StringComparer.OrdinalIgnoreCase) { ".cs", ".py", ".js", ".ts", ".tsx", ".jsx", ".java", ".cpp", ".c", ".h", ".hpp", ".go", ".rs", ".rb", ".php", ".swift", ".kt", ".scala", ".html", ".css", ".scss", ".json", ".xml", ".yaml", ".yml", ".md", ".txt", ".sql", ".sh", ".bat", ".ps1", ".csproj", ".sln", ".gradle", ".pom" }; public bool IsIndexed => _indexed; public int ChunkCount => _totalDocs; private string GetDbPath(string workFolder) { string text = Convert.ToHexString(SHA256.HashData(Encoding.UTF8.GetBytes(workFolder.ToLowerInvariant()))).Substring(0, 16); string text2 = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "AxCopilot", "index"); Directory.CreateDirectory(text2); return Path.Combine(text2, text + ".db"); } private void EnsureDb(string workFolder) { if (_db != null && _workFolder == workFolder) { return; } _db?.Dispose(); _workFolder = workFolder; string dbPath = GetDbPath(workFolder); _db = new SqliteConnection("Data Source=" + dbPath); _db.Open(); using (SqliteCommand sqliteCommand = _db.CreateCommand()) { sqliteCommand.CommandText = "PRAGMA journal_mode=WAL; PRAGMA synchronous=NORMAL;"; sqliteCommand.ExecuteNonQuery(); } using SqliteCommand sqliteCommand2 = _db.CreateCommand(); sqliteCommand2.CommandText = " CREATE TABLE IF NOT EXISTS files (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n path TEXT NOT NULL UNIQUE,\n last_modified TEXT NOT NULL,\n file_size INTEGER NOT NULL\n );\n CREATE TABLE IF NOT EXISTS chunks (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n file_id INTEGER NOT NULL,\n start_line INTEGER NOT NULL,\n end_line INTEGER NOT NULL,\n content TEXT NOT NULL,\n FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE\n );\n CREATE TABLE IF NOT EXISTS tokens (\n chunk_id INTEGER NOT NULL,\n token TEXT NOT NULL,\n tf INTEGER NOT NULL,\n FOREIGN KEY (chunk_id) REFERENCES chunks(id) ON DELETE CASCADE\n );\n CREATE TABLE IF NOT EXISTS doc_freq (\n token TEXT PRIMARY KEY,\n df INTEGER NOT NULL\n );\n CREATE TABLE IF NOT EXISTS meta (\n key TEXT PRIMARY KEY,\n value TEXT NOT NULL\n );\n CREATE INDEX IF NOT EXISTS idx_tokens_chunk ON tokens(chunk_id);\n CREATE INDEX IF NOT EXISTS idx_tokens_token ON tokens(token);\n CREATE INDEX IF NOT EXISTS idx_chunks_file ON chunks(file_id);"; sqliteCommand2.ExecuteNonQuery(); } public async Task IndexAsync(string workFolder, CancellationToken ct = default(CancellationToken)) { if (string.IsNullOrEmpty(workFolder) || !Directory.Exists(workFolder)) { return; } EnsureDb(workFolder); await Task.Run(delegate { Dictionary dictionary = LoadExistingFiles(); int maxFileKb = 500; try { int num = ((!(Application.Current is App app)) ? ((int?)null) : app.SettingsService?.Settings.Llm.Code.CodeIndexMaxFileKb) ?? 500; if (num > 0) { maxFileKb = num; } } catch { } Dictionary dictionary2 = ScanFiles(workFolder, maxFileKb); int num2 = 0; int num3 = 0; int num4 = 0; string key; foreach (KeyValuePair item in dictionary) { item.Deconstruct(out key, out var value); string key2 = key; int fileId = value; if (!dictionary2.ContainsKey(key2)) { RemoveFileFromIndex(fileId); num4++; } } foreach (KeyValuePair item2 in dictionary2) { item2.Deconstruct(out key, out var value2); string text = key; FileInfo fileInfo = value2; if (ct.IsCancellationRequested) { break; } string lastMod = fileInfo.LastWriteTimeUtc.ToString("O"); long length = fileInfo.Length; if (dictionary.TryGetValue(text, out var value3)) { if (!IsFileChanged(value3, lastMod, length)) { continue; } RemoveFileFromIndex(value3); num3++; } else { num2++; } IndexFile(workFolder, text, lastMod, length); } RebuildDocFreq(); _totalDocs = GetTotalChunkCount(); _indexed = _totalDocs > 0; SaveMeta("lastIndexed", DateTime.UtcNow.ToString("O")); SaveMeta("workFolder", workFolder); LogService.Info($"코드 인덱싱 완료: {_totalDocs}개 청크 (추가:{num2} 갱신:{num3} 삭제:{num4}) [{workFolder}]"); }, ct); } private Dictionary LoadExistingFiles() { Dictionary dictionary = new Dictionary(StringComparer.OrdinalIgnoreCase); using SqliteCommand sqliteCommand = _db.CreateCommand(); sqliteCommand.CommandText = "SELECT id, path FROM files"; using SqliteDataReader sqliteDataReader = sqliteCommand.ExecuteReader(); while (sqliteDataReader.Read()) { dictionary[sqliteDataReader.GetString(1)] = sqliteDataReader.GetInt32(0); } return dictionary; } private Dictionary ScanFiles(string workFolder, int maxFileKb = 500) { Dictionary dictionary = new Dictionary(StringComparer.OrdinalIgnoreCase); long num = (long)maxFileKb * 1024L; try { IEnumerable enumerable = Directory.EnumerateFiles(workFolder, "*.*", new EnumerationOptions { RecurseSubdirectories = true, IgnoreInaccessible = true, MaxRecursionDepth = 8 }); foreach (string item in enumerable) { string extension = Path.GetExtension(item); if (!CodeExtensions.Contains(extension)) { continue; } try { FileInfo fileInfo = new FileInfo(item); if (fileInfo.Length <= num) { string relativePath = Path.GetRelativePath(workFolder, item); dictionary[relativePath] = fileInfo; } } catch { } } } catch { } return dictionary; } private bool IsFileChanged(int fileId, string lastMod, long size) { using SqliteCommand sqliteCommand = _db.CreateCommand(); sqliteCommand.CommandText = "SELECT last_modified, file_size FROM files WHERE id = @id"; sqliteCommand.Parameters.AddWithValue("@id", fileId); using SqliteDataReader sqliteDataReader = sqliteCommand.ExecuteReader(); if (!sqliteDataReader.Read()) { return true; } return sqliteDataReader.GetString(0) != lastMod || sqliteDataReader.GetInt64(1) != size; } private void RemoveFileFromIndex(int fileId) { List list = new List(); using (SqliteCommand sqliteCommand = _db.CreateCommand()) { sqliteCommand.CommandText = "SELECT id FROM chunks WHERE file_id = @fid"; sqliteCommand.Parameters.AddWithValue("@fid", fileId); using SqliteDataReader sqliteDataReader = sqliteCommand.ExecuteReader(); while (sqliteDataReader.Read()) { list.Add(sqliteDataReader.GetInt32(0)); } } foreach (int item in list) { using SqliteCommand sqliteCommand2 = _db.CreateCommand(); sqliteCommand2.CommandText = "DELETE FROM tokens WHERE chunk_id = @cid"; sqliteCommand2.Parameters.AddWithValue("@cid", item); sqliteCommand2.ExecuteNonQuery(); } using (SqliteCommand sqliteCommand3 = _db.CreateCommand()) { sqliteCommand3.CommandText = "DELETE FROM chunks WHERE file_id = @fid"; sqliteCommand3.Parameters.AddWithValue("@fid", fileId); sqliteCommand3.ExecuteNonQuery(); } using SqliteCommand sqliteCommand4 = _db.CreateCommand(); sqliteCommand4.CommandText = "DELETE FROM files WHERE id = @fid"; sqliteCommand4.Parameters.AddWithValue("@fid", fileId); sqliteCommand4.ExecuteNonQuery(); } private void IndexFile(string workFolder, string relPath, string lastMod, long size) { try { string path = Path.Combine(workFolder, relPath); string text = File.ReadAllText(path, Encoding.UTF8); int num; using (SqliteCommand sqliteCommand = _db.CreateCommand()) { sqliteCommand.CommandText = "INSERT INTO files (path, last_modified, file_size) VALUES (@p, @m, @s) RETURNING id"; sqliteCommand.Parameters.AddWithValue("@p", relPath); sqliteCommand.Parameters.AddWithValue("@m", lastMod); sqliteCommand.Parameters.AddWithValue("@s", size); num = Convert.ToInt32(sqliteCommand.ExecuteScalar()); } string[] array = text.Split('\n'); using SqliteTransaction sqliteTransaction = _db.BeginTransaction(); for (int i = 0; i < array.Length; i += 50) { string text2 = string.Join("\n", array.AsSpan(i, Math.Min(50, array.Length - i)).ToArray()); if (string.IsNullOrWhiteSpace(text2)) { continue; } int num2 = Math.Min(i + 50, array.Length); int num3; using (SqliteCommand sqliteCommand2 = _db.CreateCommand()) { sqliteCommand2.Transaction = sqliteTransaction; sqliteCommand2.CommandText = "INSERT INTO chunks (file_id, start_line, end_line, content) VALUES (@f, @s, @e, @c) RETURNING id"; sqliteCommand2.Parameters.AddWithValue("@f", num); sqliteCommand2.Parameters.AddWithValue("@s", i + 1); sqliteCommand2.Parameters.AddWithValue("@e", num2); sqliteCommand2.Parameters.AddWithValue("@c", text2); num3 = Convert.ToInt32(sqliteCommand2.ExecuteScalar()); } Dictionary dictionary = Tokenize(text2); foreach (var (value, num5) in dictionary) { using SqliteCommand sqliteCommand3 = _db.CreateCommand(); sqliteCommand3.Transaction = sqliteTransaction; sqliteCommand3.CommandText = "INSERT INTO tokens (chunk_id, token, tf) VALUES (@cid, @t, @tf)"; sqliteCommand3.Parameters.AddWithValue("@cid", num3); sqliteCommand3.Parameters.AddWithValue("@t", value); sqliteCommand3.Parameters.AddWithValue("@tf", num5); sqliteCommand3.ExecuteNonQuery(); } } sqliteTransaction.Commit(); } catch { } } private void RebuildDocFreq() { using SqliteCommand sqliteCommand = _db.CreateCommand(); sqliteCommand.CommandText = " DELETE FROM doc_freq;\n INSERT INTO doc_freq (token, df)\n SELECT token, COUNT(DISTINCT chunk_id) FROM tokens GROUP BY token;"; sqliteCommand.ExecuteNonQuery(); } private int GetTotalChunkCount() { using SqliteCommand sqliteCommand = _db.CreateCommand(); sqliteCommand.CommandText = "SELECT COUNT(*) FROM chunks"; return Convert.ToInt32(sqliteCommand.ExecuteScalar()); } private void SaveMeta(string key, string value) { using SqliteCommand sqliteCommand = _db.CreateCommand(); sqliteCommand.CommandText = "INSERT OR REPLACE INTO meta (key, value) VALUES (@k, @v)"; sqliteCommand.Parameters.AddWithValue("@k", key); sqliteCommand.Parameters.AddWithValue("@v", value); sqliteCommand.ExecuteNonQuery(); } public List Search(string query, int maxResults = 5) { if (!_indexed || _db == null || _totalDocs == 0) { return new List(); } Dictionary dictionary = Tokenize(query); if (dictionary.Count == 0) { return new List(); } Dictionary dictionary2 = new Dictionary(); foreach (string key in dictionary.Keys) { using SqliteCommand sqliteCommand = _db.CreateCommand(); sqliteCommand.CommandText = "SELECT df FROM doc_freq WHERE token = @t"; sqliteCommand.Parameters.AddWithValue("@t", key); object obj = sqliteCommand.ExecuteScalar(); if (obj != null) { dictionary2[key] = Convert.ToInt32(obj); } } HashSet hashSet = new HashSet(); foreach (string key2 in dictionary.Keys) { using SqliteCommand sqliteCommand2 = _db.CreateCommand(); sqliteCommand2.CommandText = "SELECT DISTINCT chunk_id FROM tokens WHERE token = @t"; sqliteCommand2.Parameters.AddWithValue("@t", key2); using SqliteDataReader sqliteDataReader = sqliteCommand2.ExecuteReader(); while (sqliteDataReader.Read()) { hashSet.Add(sqliteDataReader.GetInt32(0)); } } if (hashSet.Count == 0) { return new List(); } List<(int, double)> list = new List<(int, double)>(); foreach (int item in hashSet) { Dictionary dictionary3 = new Dictionary(StringComparer.OrdinalIgnoreCase); using (SqliteCommand sqliteCommand3 = _db.CreateCommand()) { sqliteCommand3.CommandText = "SELECT token, tf FROM tokens WHERE chunk_id = @cid"; sqliteCommand3.Parameters.AddWithValue("@cid", item); using SqliteDataReader sqliteDataReader2 = sqliteCommand3.ExecuteReader(); while (sqliteDataReader2.Read()) { dictionary3[sqliteDataReader2.GetString(0)] = sqliteDataReader2.GetInt32(1); } } double num = ComputeTfIdfSimilarity(dictionary, dictionary3, dictionary2); if (num > 0.01) { list.Add((item, num)); } } List<(int, double)> list2 = list.OrderByDescending<(int, double), double>(((int ChunkId, double Score) s) => s.Score).Take(maxResults).ToList(); List list3 = new List(); foreach (var (num2, score) in list2) { using SqliteCommand sqliteCommand4 = _db.CreateCommand(); sqliteCommand4.CommandText = " SELECT f.path, c.start_line, c.end_line, c.content\n FROM chunks c JOIN files f ON c.file_id = f.id\n WHERE c.id = @cid"; sqliteCommand4.Parameters.AddWithValue("@cid", num2); using SqliteDataReader sqliteDataReader3 = sqliteCommand4.ExecuteReader(); if (sqliteDataReader3.Read()) { SearchResult obj2 = new SearchResult { FilePath = sqliteDataReader3.GetString(0), StartLine = sqliteDataReader3.GetInt32(1), EndLine = sqliteDataReader3.GetInt32(2), Score = score }; string text = sqliteDataReader3.GetString(3); obj2.Preview = ((text != null && text.Length > 200) ? (text.Substring(0, 200) + "...") : sqliteDataReader3.GetString(3)); list3.Add(obj2); } } return list3; } public void TryLoadExisting(string workFolder) { if (string.IsNullOrEmpty(workFolder) || !Directory.Exists(workFolder)) { return; } string dbPath = GetDbPath(workFolder); if (File.Exists(dbPath)) { EnsureDb(workFolder); _totalDocs = GetTotalChunkCount(); _indexed = _totalDocs > 0; if (_indexed) { LogService.Info($"기존 코드 인덱스 로드: {_totalDocs}개 청크 [{workFolder}]"); } } } private double ComputeTfIdfSimilarity(Dictionary queryTf, Dictionary docTf, Dictionary dfMap) { double num = 0.0; double num2 = 0.0; double num3 = 0.0; string key; int value; foreach (KeyValuePair item in queryTf) { item.Deconstruct(out key, out value); string key2 = key; int num4 = value; int valueOrDefault = dfMap.GetValueOrDefault(key2, 0); double num5 = Math.Log(1.0 + (double)_totalDocs / (1.0 + (double)valueOrDefault)); double num6 = (double)num4 * num5; num2 += num6 * num6; if (docTf.TryGetValue(key2, out var value2)) { double num7 = (double)value2 * num5; num += num6 * num7; } } foreach (KeyValuePair item2 in docTf) { item2.Deconstruct(out key, out value); string key3 = key; int num8 = value; int valueOrDefault2 = dfMap.GetValueOrDefault(key3, 0); double num9 = Math.Log(1.0 + (double)_totalDocs / (1.0 + (double)valueOrDefault2)); double num10 = (double)num8 * num9; num3 += num10 * num10; } if (num2 == 0.0 || num3 == 0.0) { return 0.0; } return num / (Math.Sqrt(num2) * Math.Sqrt(num3)); } private static Dictionary Tokenize(string text) { Dictionary dictionary = new Dictionary(StringComparer.OrdinalIgnoreCase); IEnumerable enumerable = from w in Regex.Split(text, "[^a-zA-Z0-9가-힣_]+").SelectMany(SplitCamelCase) where w.Length >= 2 && !StopWords.Contains(w) select w; foreach (string item in enumerable) { string key = item.ToLowerInvariant(); dictionary.TryGetValue(key, out var value); dictionary[key] = value + 1; } List list = enumerable.Select((string w) => w.ToLowerInvariant()).ToList(); for (int num = 0; num < list.Count - 1; num++) { string key2 = list[num] + "_" + list[num + 1]; dictionary.TryGetValue(key2, out var value2); dictionary[key2] = value2 + 1; } return dictionary; } private static IEnumerable SplitCamelCase(string word) { if (string.IsNullOrEmpty(word)) { yield break; } StringBuilder sb = new StringBuilder(); foreach (char ch in word) { if (char.IsUpper(ch) && sb.Length > 0) { yield return sb.ToString(); sb.Clear(); } sb.Append(ch); } if (sb.Length > 0) { yield return sb.ToString(); } } public void Dispose() { _db?.Dispose(); _db = null; } }