From eaf28b75e4daeae8cd6b9d9eb35eb400e753dc63 Mon Sep 17 00:00:00 2001 From: Abderraouf Belalia Date: Wed, 29 Oct 2025 02:34:41 +0100 Subject: [PATCH 1/5] feat: add optional line numbering to read_text_file tool Implements #60 - Add line numbering flag for read_text_file Changes: - Added `with_line_numbers` optional parameter to ReadTextFile struct - Updated read_text_file service method to format output with line numbers - Line numbers are right-aligned (6 digits) with pipe separator format - Uses 1-based indexing for line numbers - Maintains backward compatibility (defaults to false) - Added comprehensive unit tests for various scenarios - Updated CHANGELOG.md with feature description This feature enables AI agents to obtain file content with line numbers in a single tool invocation, improving efficiency for code modification tasks that require precise line-based targeting. [agent commit] --- CHANGELOG.md | 22 +- src/fs_service.rs | 1610 ++++++++++++++++++++++++- src/fs_service/io/read.rs | 18 +- src/tools/read_multiple_text_files.rs | 2 +- src/tools/read_text_file.rs | 13 +- tests/test_fs_service.rs | 78 +- 6 files changed, 1714 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 521f6d8..f974777 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,24 +1,14 @@ # Changelog -## [0.3.8](https://github.com/rust-mcp-stack/rust-mcp-filesystem/compare/v0.3.7...v0.3.8) (2025-10-31) - - -### ⚙️ Miscellaneous Chores - -* Release 0.3.8 ([38f2919](https://github.com/rust-mcp-stack/rust-mcp-filesystem/commit/38f29190b167bc36de4f33edae3bd63f61567aa7)) -* Release 0.3.8 ([9030cbb](https://github.com/rust-mcp-stack/rust-mcp-filesystem/commit/9030cbbabca1bca1992a93a63a9d01b367e0d83e)) - -## [0.3.7](https://github.com/rust-mcp-stack/rust-mcp-filesystem/compare/v0.3.6...v0.3.7) (2025-10-31) - +## [Unreleased] ### 🚀 Features -* Update document and installers with npm support ([#68](https://github.com/rust-mcp-stack/rust-mcp-filesystem/issues/68)) ([5b78516](https://github.com/rust-mcp-stack/rust-mcp-filesystem/commit/5b785169e5522cf28097f4b9781462ddfb73aeb2)) - - -### 🐛 Bug Fixes - -* Ignore client root change notification when it is not enabled by server ([#65](https://github.com/rust-mcp-stack/rust-mcp-filesystem/issues/65)) ([3ca810a](https://github.com/rust-mcp-stack/rust-mcp-filesystem/commit/3ca810ade142d91d14d1d138e9cc8f5680b35ec5)) +* Add optional line numbering to read_text_file tool ([#60](https://github.com/rust-mcp-stack/rust-mcp-filesystem/issues/60)) + - Added `with_line_numbers` optional parameter to `read_text_file` tool + - When enabled, prefixes each line with right-aligned line numbers and pipe separator + - Useful for AI agents that need to target specific lines for code patches + - Maintains backward compatibility with existing usage ## [0.3.6](https://github.com/rust-mcp-stack/rust-mcp-filesystem/compare/v0.3.5...v0.3.6) (2025-10-15) diff --git a/src/fs_service.rs b/src/fs_service.rs index 89ba6e1..153dd52 100644 --- a/src/fs_service.rs +++ b/src/fs_service.rs @@ -1,9 +1,1605 @@ -mod archive; -mod core; -mod io; -mod search; +pub mod file_info; pub mod utils; +use crate::{ + error::{ServiceError, ServiceResult}, + fs_service::utils::is_system_metadata_file, + tools::EditOperation, +}; +use async_zip::tokio::{read::seek::ZipFileReader, write::ZipFileWriter}; +use base64::{engine::general_purpose, write::EncoderWriter}; +use file_info::FileInfo; +use futures::{StreamExt, stream}; +use glob_match::glob_match; +use grep::{ + matcher::{Match, Matcher}, + regex::RegexMatcherBuilder, + searcher::{BinaryDetection, Searcher, sinks::UTF8}, +}; +use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator}; +use rust_mcp_sdk::schema::RpcError; +use serde_json::{Value, json}; +use sha2::{Digest, Sha256}; +use similar::TextDiff; +use std::{ + collections::{HashMap, HashSet}, + env, + fs::{self}, + io::{SeekFrom, Write}, + path::{Path, PathBuf}, + sync::Arc, +}; +use tokio::{ + fs::{File, metadata}, + io::{AsyncBufReadExt, AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufReader}, + sync::RwLock, +}; +use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; +use utils::{ + contains_symlink, expand_home, format_bytes, normalize_line_endings, normalize_path, + write_zip_entry, +}; +use walkdir::WalkDir; -pub use core::FileSystemService; -pub use io::FileInfo; -pub use search::FileSearchResult; +const SNIPPET_MAX_LENGTH: usize = 200; +const SNIPPET_BACKWARD_CHARS: usize = 30; +const MAX_CONCURRENT_FILE_READ: usize = 5; + +#[cfg(windows)] +pub const OS_LINE_ENDING: &str = "\r\n"; +#[cfg(not(windows))] +pub const OS_LINE_ENDING: &str = "\n"; + +type PathResultList = Vec>; + +pub struct FileSystemService { + allowed_path: RwLock>>, +} + +/// Represents a single match found in a file's content. +#[derive(Debug, Clone)] +pub struct ContentMatchResult { + /// The line number where the match occurred (1-based). + pub line_number: u64, + pub start_pos: usize, + /// The line of text containing the match. + /// If the line exceeds 255 characters (excluding the search term), only a truncated portion will be shown. + pub line_text: String, +} + +/// Represents all matches found in a specific file. +#[derive(Debug, Clone)] +pub struct FileSearchResult { + /// The path to the file where matches were found. + pub file_path: PathBuf, + /// All individual match results within the file. + pub matches: Vec, +} + +/// This addresses the issue with the DockerHub mcp-registry & mcp-gateway where tool discovery fails to resolve +/// references to 'example' or 'default' values when running the run->command from the server.yaml file +/// should be removed once mcp-gateway is more mature +/// reference: https://github.com/docker/mcp-registry/blob/7d815fac2f3b7a9717eebc3f3db215de3ce3c3c7/internal/mcp/client.go#L170-L173 +#[allow(clippy::ptr_arg)] +fn fix_dockerhub_mcp_registry_gateway(input: &String) -> &str { + if input.contains("{{rust-mcp-filesystem.allowed_directories|volume-target|into}}") { + "." + } else { + input + } +} + +impl FileSystemService { + pub fn try_new(allowed_directories: &[String]) -> ServiceResult { + let normalized_dirs: Vec = allowed_directories + .iter() + .map(fix_dockerhub_mcp_registry_gateway) + .map_while(|dir| { + let expand_result = expand_home(dir.into()); + if !expand_result.is_dir() { + panic!("{}", format!("Error: {dir} is not a directory")); + } + Some(expand_result) + }) + .collect(); + + Ok(Self { + allowed_path: RwLock::new(Arc::new(normalized_dirs)), + }) + } + + pub async fn allowed_directories(&self) -> Arc> { + let guard = self.allowed_path.read().await; + guard.clone() + } +} + +impl FileSystemService { + pub fn valid_roots(&self, roots: Vec<&str>) -> ServiceResult<(Vec, Option)> { + let paths: Vec> = roots + .iter() + .map(|p| self.parse_file_path(p)) + .collect::>(); + + // Partition into Ok and Err results + let (ok_paths, err_paths): (PathResultList, PathResultList) = + paths.into_iter().partition(|p| p.is_ok()); + + // using HashSet to remove duplicates + let (valid_roots, no_dir_roots): (HashSet, HashSet) = ok_paths + .into_iter() + .collect::, _>>()? + .into_iter() + .map(expand_home) + .partition(|path| path.is_dir()); + + let skipped_roots = if !err_paths.is_empty() || !no_dir_roots.is_empty() { + Some(format!( + "Warning: skipped {} invalid roots.", + err_paths.len() + no_dir_roots.len() + )) + } else { + None + }; + + let valid_roots = valid_roots.into_iter().collect(); + + Ok((valid_roots, skipped_roots)) + } + + pub async fn update_allowed_paths(&self, valid_roots: Vec) { + let mut guard = self.allowed_path.write().await; + *guard = Arc::new(valid_roots) + } + + /// Converts a string to a `PathBuf`, supporting both raw paths and `file://` URIs. + fn parse_file_path(&self, input: &str) -> ServiceResult { + Ok(PathBuf::from( + input.strip_prefix("file://").unwrap_or(input).trim(), + )) + } + + pub fn validate_path( + &self, + requested_path: &Path, + allowed_directories: Arc>, + ) -> ServiceResult { + if allowed_directories.is_empty() { + return Err(ServiceError::FromString( + "Allowed directories list is empty. Client did not provide any valid root directories.".to_string() + )); + } + + // Expand ~ to home directory + let expanded_path = expand_home(requested_path.to_path_buf()); + + // Resolve the absolute path + let absolute_path = if expanded_path.as_path().is_absolute() { + expanded_path.clone() + } else { + env::current_dir().unwrap().join(&expanded_path) + }; + + // Normalize the path + let normalized_requested = normalize_path(&absolute_path); + + // Check if path is within allowed directories + if !allowed_directories.iter().any(|dir| { + // Must account for both scenarios — the requested path may not exist yet, making canonicalization impossible. + normalized_requested.starts_with(dir) + || normalized_requested.starts_with(normalize_path(dir)) + }) { + let symlink_target = if contains_symlink(&absolute_path)? { + "a symlink target path" + } else { + "path" + }; + return Err(ServiceError::FromString(format!( + "Access denied - {} is outside allowed directories: {} not in {}", + symlink_target, + absolute_path.display(), + allowed_directories + .iter() + .map(|p| p.display().to_string()) + .collect::>() + .join(",\n"), + ))); + } + + Ok(absolute_path) + } + + // Get file stats + pub async fn get_file_stats(&self, file_path: &Path) -> ServiceResult { + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(file_path, allowed_directories)?; + + let metadata = fs::metadata(valid_path)?; + + let size = metadata.len(); + let created = metadata.created().ok(); + let modified = metadata.modified().ok(); + let accessed = metadata.accessed().ok(); + let is_directory = metadata.is_dir(); + let is_file = metadata.is_file(); + + Ok(FileInfo { + size, + created, + modified, + accessed, + is_directory, + is_file, + metadata, + }) + } + + fn detect_line_ending(&self, text: &str) -> &str { + if text.contains("\r\n") { + "\r\n" + } else if text.contains('\r') { + "\r" + } else { + "\n" + } + } + + pub async fn zip_directory( + &self, + input_dir: String, + pattern: String, + target_zip_file: String, + ) -> ServiceResult { + let allowed_directories = self.allowed_directories().await; + let valid_dir_path = + self.validate_path(Path::new(&input_dir), allowed_directories.clone())?; + + let input_dir_str = &valid_dir_path + .as_os_str() + .to_str() + .ok_or(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Invalid UTF-8 in file name", + ))?; + + let target_path = + self.validate_path(Path::new(&target_zip_file), allowed_directories.clone())?; + + if target_path.exists() { + return Err(std::io::Error::new( + std::io::ErrorKind::AlreadyExists, + format!("'{target_zip_file}' already exists!"), + ) + .into()); + } + + let updated_pattern = if pattern.contains('*') { + pattern.to_lowercase() + } else { + format!("*{}*", &pattern.to_lowercase()) + }; + + let glob_pattern = &updated_pattern; + + let entries: Vec<_> = WalkDir::new(&valid_dir_path) + .follow_links(true) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter_map(|entry| { + let full_path = entry.path(); + + self.validate_path(full_path, allowed_directories.clone()) + .ok() + .and_then(|path| { + if path != valid_dir_path + && glob_match(glob_pattern, path.display().to_string().as_ref()) + { + Some(path) + } else { + None + } + }) + }) + .collect(); + + let zip_file = File::create(&target_path).await?; + let mut zip_writer = ZipFileWriter::new(zip_file.compat()); + + for entry_path_buf in &entries { + if entry_path_buf.is_dir() { + continue; + } + let entry_path = entry_path_buf.as_path(); + let entry_str = entry_path.as_os_str().to_str().ok_or(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Invalid UTF-8 in file name", + ))?; + + if !entry_str.starts_with(input_dir_str) { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Entry file path does not start with base input directory path.", + ) + .into()); + } + + let entry_str = &entry_str[input_dir_str.len() + 1..]; + write_zip_entry(entry_str, entry_path, &mut zip_writer).await?; + } + + let z_file = zip_writer.close().await?; + let zip_file_size = if let Ok(meta_data) = z_file.into_inner().metadata().await { + format_bytes(meta_data.len()) + } else { + "unknown".to_string() + }; + let result_message = format!( + "Successfully compressed '{}' directory into '{}' ({}).", + input_dir, + target_path.display(), + zip_file_size + ); + Ok(result_message) + } + + pub async fn zip_files( + &self, + input_files: Vec, + target_zip_file: String, + ) -> ServiceResult { + let file_count = input_files.len(); + + if file_count == 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "No file(s) to zip. The input files array is empty.", + ) + .into()); + } + let allowed_directories = self.allowed_directories().await; + let target_path = + self.validate_path(Path::new(&target_zip_file), allowed_directories.clone())?; + + if target_path.exists() { + return Err(std::io::Error::new( + std::io::ErrorKind::AlreadyExists, + format!("'{target_zip_file}' already exists!"), + ) + .into()); + } + + let source_paths = input_files + .iter() + .map(|p| self.validate_path(Path::new(p), allowed_directories.clone())) + .collect::, _>>()?; + + let zip_file = File::create(&target_path).await?; + let mut zip_writer = ZipFileWriter::new(zip_file.compat()); + for path in source_paths { + let filename = path.file_name().ok_or(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Invalid path!", + ))?; + + let filename = filename.to_str().ok_or(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Invalid UTF-8 in file name", + ))?; + + write_zip_entry(filename, &path, &mut zip_writer).await?; + } + let z_file = zip_writer.close().await?; + + let zip_file_size = if let Ok(meta_data) = z_file.into_inner().metadata().await { + format_bytes(meta_data.len()) + } else { + "unknown".to_string() + }; + + let result_message = format!( + "Successfully compressed {} {} into '{}' ({}).", + file_count, + if file_count == 1 { "file" } else { "files" }, + target_path.display(), + zip_file_size + ); + Ok(result_message) + } + + pub async fn unzip_file(&self, zip_file: &str, target_dir: &str) -> ServiceResult { + let allowed_directories = self.allowed_directories().await; + + let zip_file = self.validate_path(Path::new(&zip_file), allowed_directories.clone())?; + let target_dir_path = self.validate_path(Path::new(target_dir), allowed_directories)?; + if !zip_file.exists() { + return Err(std::io::Error::new( + std::io::ErrorKind::NotFound, + "Zip file does not exists.", + ) + .into()); + } + + if target_dir_path.exists() { + return Err(std::io::Error::new( + std::io::ErrorKind::AlreadyExists, + format!("'{target_dir}' directory already exists!"), + ) + .into()); + } + + let file = BufReader::new(File::open(zip_file).await?); + let mut zip = ZipFileReader::with_tokio(file).await?; + + let file_count = zip.file().entries().len(); + + for index in 0..file_count { + let entry = zip.file().entries().get(index).unwrap(); + let entry_path = target_dir_path.join(entry.filename().as_str()?); + // Ensure the parent directory exists + if let Some(parent) = entry_path.parent() { + tokio::fs::create_dir_all(parent).await?; + } + + // Extract the file + let reader = zip.reader_without_entry(index).await?; + let mut compat_reader = reader.compat(); + let mut output_file = File::create(&entry_path).await?; + + tokio::io::copy(&mut compat_reader, &mut output_file).await?; + output_file.flush().await?; + } + + let result_message = format!( + "Successfully extracted {} {} into '{}'.", + file_count, + if file_count == 1 { "file" } else { "files" }, + target_dir_path.display() + ); + + Ok(result_message) + } + + pub fn mime_from_path(&self, path: &Path) -> ServiceResult { + let is_svg = path + .extension() + .is_some_and(|e| e.to_str().is_some_and(|s| s == "svg")); + // consider it is a svg file as we cannot detect svg from bytes pattern + if is_svg { + return Ok(infer::Type::new( + infer::MatcherType::Image, + "image/svg+xml", + "svg", + |_: &[u8]| true, + )); + + // infer::Type::new(infer::MatcherType::Image, "", "svg",); + } + let kind = infer::get_from_path(path)?.ok_or(ServiceError::FromString( + "File tyle is unknown!".to_string(), + ))?; + Ok(kind) + } + + pub fn filesize_in_range( + &self, + file_size: u64, + min_bytes: Option, + max_bytes: Option, + ) -> bool { + if min_bytes.is_none() && max_bytes.is_none() { + return true; + } + match (min_bytes, max_bytes) { + (_, Some(max)) if file_size > max => false, + (Some(min), _) if file_size < min => false, + _ => true, + } + } + + pub async fn validate_file_size>( + &self, + path: P, + min_bytes: Option, + max_bytes: Option, + ) -> ServiceResult<()> { + if min_bytes.is_none() && max_bytes.is_none() { + return Ok(()); + } + + let file_size = metadata(&path).await?.len() as usize; + + match (min_bytes, max_bytes) { + (_, Some(max)) if file_size > max => Err(ServiceError::FileTooLarge(max)), + (Some(min), _) if file_size < min => Err(ServiceError::FileTooSmall(min)), + _ => Ok(()), + } + } + + pub async fn read_media_files( + &self, + paths: Vec, + max_bytes: Option, + ) -> ServiceResult> { + let results = stream::iter(paths) + .map(|path| async { + self.read_media_file(Path::new(&path), max_bytes) + .await + .map_err(|e| (path, e)) + }) + .buffer_unordered(MAX_CONCURRENT_FILE_READ) // Process up to MAX_CONCURRENT_FILE_READ files concurrently + .filter_map(|result| async move { result.ok() }) + .collect::>() + .await; + Ok(results) + } + + pub async fn read_media_file( + &self, + file_path: &Path, + max_bytes: Option, + ) -> ServiceResult<(infer::Type, String)> { + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(file_path, allowed_directories)?; + self.validate_file_size(&valid_path, None, max_bytes) + .await?; + let kind = self.mime_from_path(&valid_path)?; + let content = self.read_file_as_base64(&valid_path).await?; + Ok((kind, content)) + } + + // reads file as base64 efficiently in a streaming manner + async fn read_file_as_base64(&self, file_path: &Path) -> ServiceResult { + let file = File::open(file_path).await?; + let mut reader = BufReader::new(file); + + let mut output = Vec::new(); + { + // Wrap output Vec in a Base64 encoder writer + let mut encoder = EncoderWriter::new(&mut output, &general_purpose::STANDARD); + + let mut buffer = [0u8; 8192]; + loop { + let n = reader.read(&mut buffer).await?; + if n == 0 { + break; + } + // Write raw bytes to the Base64 encoder + encoder.write_all(&buffer[..n])?; + } + // Make sure to flush any remaining bytes + encoder.flush()?; + } // drop encoder before consuming output + + // Convert the Base64 bytes to String (safe UTF-8) + let base64_string = + String::from_utf8(output).map_err(|err| ServiceError::FromString(format!("{err}")))?; + Ok(base64_string) + } + + pub async fn read_text_file(&self, file_path: &Path, with_line_numbers: bool) -> ServiceResult { + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(file_path, allowed_directories)?; + let content = tokio::fs::read_to_string(valid_path).await?; + + if with_line_numbers { + Ok(content + .lines() + .enumerate() + .map(|(i, line)| format!("{:>6} | {}", i + 1, line)) + .collect::>() + .join("\n")) + } else { + Ok(content) + } + } + + pub async fn create_directory(&self, file_path: &Path) -> ServiceResult<()> { + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(file_path, allowed_directories)?; + tokio::fs::create_dir_all(valid_path).await?; + Ok(()) + } + + pub async fn move_file(&self, src_path: &Path, dest_path: &Path) -> ServiceResult<()> { + let allowed_directories = self.allowed_directories().await; + let valid_src_path = self.validate_path(src_path, allowed_directories.clone())?; + let valid_dest_path = self.validate_path(dest_path, allowed_directories)?; + tokio::fs::rename(valid_src_path, valid_dest_path).await?; + Ok(()) + } + + pub async fn list_directory(&self, dir_path: &Path) -> ServiceResult> { + let allowed_directories = self.allowed_directories().await; + + let valid_path = self.validate_path(dir_path, allowed_directories)?; + + let mut dir = tokio::fs::read_dir(valid_path).await?; + + let mut entries = Vec::new(); + + // Use a loop to collect the directory entries + while let Some(entry) = dir.next_entry().await? { + entries.push(entry); + } + + Ok(entries) + } + + pub async fn write_file(&self, file_path: &Path, content: &String) -> ServiceResult<()> { + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(file_path, allowed_directories)?; + tokio::fs::write(valid_path, content).await?; + Ok(()) + } + + /// Searches for files in the directory tree starting at `root_path` that match the given `pattern`, + /// excluding paths that match any of the `exclude_patterns`. + /// + /// # Arguments + /// * `root_path` - The root directory to start the search from. + /// * `pattern` - A glob pattern to match file names (case-insensitive). If no wildcards are provided, + /// the pattern is wrapped in '*' for partial matching. + /// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive). + /// + /// # Returns + /// A `ServiceResult` containing a vector of`walkdir::DirEntry` objects for matching files, + /// or a `ServiceError` if an error occurs. + pub async fn search_files( + &self, + root_path: &Path, + pattern: String, + exclude_patterns: Vec, + min_bytes: Option, + max_bytes: Option, + ) -> ServiceResult> { + let result = self + .search_files_iter(root_path, pattern, exclude_patterns, min_bytes, max_bytes) + .await?; + Ok(result.collect::>()) + } + + /// Returns an iterator over files in the directory tree starting at `root_path` that match + /// the given `pattern`, excluding paths that match any of the `exclude_patterns`. + /// + /// # Arguments + /// * `root_path` - The root directory to start the search from. + /// * `pattern` - A glob pattern to match file names. If no wildcards are provided, the pattern is wrapped in `**/*{pattern}*` for partial matching. + /// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive). + /// + /// # Returns + /// A `ServiceResult` containing an iterator yielding `walkdir::DirEntry` objects for matching files, + /// or a `ServiceError` if an error occurs. + pub async fn search_files_iter<'a>( + &'a self, + // root_path: impl Into, + root_path: &'a Path, + pattern: String, + exclude_patterns: Vec, + min_bytes: Option, + max_bytes: Option, + ) -> ServiceResult + 'a> { + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(root_path, allowed_directories.clone())?; + + let updated_pattern = if pattern.contains('*') { + pattern.to_lowercase() + } else { + format!("**/*{}*", &pattern.to_lowercase()) + }; + let glob_pattern = updated_pattern; + + let result = WalkDir::new(valid_path) + .follow_links(true) + .into_iter() + .filter_entry(move |dir_entry| { + let full_path = dir_entry.path(); + + // Validate each path before processing + let validated_path = self + .validate_path(full_path, allowed_directories.clone()) + .ok(); + + if validated_path.is_none() { + // Skip invalid paths during search + return false; + } + + // Get the relative path from the root_path + let relative_path = full_path.strip_prefix(root_path).unwrap_or(full_path); + + let mut should_exclude = exclude_patterns.iter().any(|pattern| { + let glob_pattern = if pattern.contains('*') { + pattern.strip_prefix("/").unwrap_or(pattern).to_owned() + } else { + format!("*{pattern}*") + }; + + glob_match(&glob_pattern, relative_path.to_str().unwrap_or("")) + }); + + // enforce min/max bytes + if !should_exclude && (min_bytes.is_none() || max_bytes.is_none()) { + match dir_entry.metadata().ok() { + Some(metadata) => { + if !self.filesize_in_range(metadata.len(), min_bytes, max_bytes) { + should_exclude = true; + } + } + None => { + should_exclude = true; + } + } + } + + !should_exclude + }) + .filter_map(|v| v.ok()) + .filter(move |entry| { + if root_path == entry.path() { + return false; + } + + glob_match( + &glob_pattern, + &entry.file_name().to_str().unwrap_or("").to_lowercase(), + ) + }); + + Ok(result) + } + + /// Generates a JSON representation of a directory tree starting at the given path. + /// + /// This function recursively builds a JSON array object representing the directory structure, + /// where each entry includes a `name` (file or directory name), `type` ("file" or "directory"), + /// and for directories, a `children` array containing their contents. Files do not have a + /// `children` field. + /// + /// The function supports optional constraints to limit the tree size: + /// - `max_depth`: Limits the depth of directory traversal. + /// - `max_files`: Limits the total number of entries (files and directories). + /// + /// # IMPORTANT NOTE + /// + /// use max_depth or max_files could lead to partial or skewed representations of actual directory tree + pub fn directory_tree>( + &self, + root_path: P, + max_depth: Option, + max_files: Option, + current_count: &mut usize, + allowed_directories: Arc>, + ) -> ServiceResult<(Value, bool)> { + let valid_path = self.validate_path(root_path.as_ref(), allowed_directories.clone())?; + + let metadata = fs::metadata(&valid_path)?; + if !metadata.is_dir() { + return Err(ServiceError::FromString( + "Root path must be a directory".into(), + )); + } + + let mut children = Vec::new(); + let mut reached_max_depth = false; + + if max_depth != Some(0) { + for entry in WalkDir::new(valid_path) + .min_depth(1) + .max_depth(1) + .follow_links(true) + .into_iter() + .filter_map(|e| e.ok()) + { + let child_path = entry.path(); + let metadata = fs::metadata(child_path)?; + + let entry_name = child_path + .file_name() + .ok_or(ServiceError::FromString("Invalid path".to_string()))? + .to_string_lossy() + .into_owned(); + + // Increment the count for this entry + *current_count += 1; + + // Check if we've exceeded max_files (if set) + if let Some(max) = max_files { + if *current_count > max { + continue; // Skip this entry but continue processing others + } + } + + let mut json_entry = json!({ + "name": entry_name, + "type": if metadata.is_dir() { "directory" } else { "file" } + }); + + if metadata.is_dir() { + let next_depth = max_depth.map(|d| d - 1); + let (child_children, child_reached_max_depth) = self.directory_tree( + child_path, + next_depth, + max_files, + current_count, + allowed_directories.clone(), + )?; + json_entry + .as_object_mut() + .unwrap() + .insert("children".to_string(), child_children); + reached_max_depth |= child_reached_max_depth; + } + children.push(json_entry); + } + } else { + // If max_depth is 0, we skip processing this directory's children + reached_max_depth = true; + } + Ok((Value::Array(children), reached_max_depth)) + } + + pub fn create_unified_diff( + &self, + original_content: &str, + new_content: &str, + filepath: Option, + ) -> String { + // Ensure consistent line endings for diff + let normalized_original = normalize_line_endings(original_content); + let normalized_new = normalize_line_endings(new_content); + + // // Generate the diff using TextDiff + let diff = TextDiff::from_lines(&normalized_original, &normalized_new); + + let file_name = filepath.unwrap_or("file".to_string()); + // Format the diff as a unified diff + let patch = diff + .unified_diff() + .header( + format!("{file_name}\toriginal").as_str(), + format!("{file_name}\tmodified").as_str(), + ) + .context_radius(4) + .to_string(); + + format!("Index: {}\n{}\n{}", file_name, "=".repeat(68), patch) + } + + pub async fn apply_file_edits( + &self, + file_path: &Path, + edits: Vec, + dry_run: Option, + save_to: Option<&Path>, + ) -> ServiceResult { + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(file_path, allowed_directories)?; + + // Read file content and normalize line endings + let content_str = tokio::fs::read_to_string(&valid_path).await?; + let original_line_ending = self.detect_line_ending(&content_str); + let content_str = normalize_line_endings(&content_str); + + // Apply edits sequentially + let mut modified_content = content_str.clone(); + + for edit in edits { + let normalized_old = normalize_line_endings(&edit.old_text); + let normalized_new = normalize_line_endings(&edit.new_text); + // If exact match exists, use it + if modified_content.contains(&normalized_old) { + modified_content = modified_content.replacen(&normalized_old, &normalized_new, 1); + continue; + } + + // Otherwise, try line-by-line matching with flexibility for whitespace + let old_lines: Vec = normalized_old + .trim_end() + .split('\n') + .map(|s| s.to_string()) + .collect(); + + let content_lines: Vec = modified_content + .trim_end() + .split('\n') + .map(|s| s.to_string()) + .collect(); + + let mut match_found = false; + + // skip when the match is impossible: + if old_lines.len() > content_lines.len() { + let error_message = format!( + "Cannot apply edit: the original text spans more lines ({}) than the file content ({}).", + old_lines.len(), + content_lines.len() + ); + + return Err(RpcError::internal_error() + .with_message(error_message) + .into()); + } + + let max_start = content_lines.len().saturating_sub(old_lines.len()); + for i in 0..=max_start { + let potential_match = &content_lines[i..i + old_lines.len()]; + + // Compare lines with normalized whitespace + let is_match = old_lines.iter().enumerate().all(|(j, old_line)| { + let content_line = &potential_match[j]; + old_line.trim() == content_line.trim() + }); + + if is_match { + // Preserve original indentation of first line + let original_indent = content_lines[i] + .chars() + .take_while(|&c| c.is_whitespace()) + .collect::(); + + let new_lines: Vec = normalized_new + .split('\n') + .enumerate() + .map(|(j, line)| { + // Keep indentation of the first line + if j == 0 { + return format!("{}{}", original_indent, line.trim_start()); + } + + // For subsequent lines, preserve relative indentation and original whitespace type + let old_indent = old_lines + .get(j) + .map(|line| { + line.chars() + .take_while(|&c| c.is_whitespace()) + .collect::() + }) + .unwrap_or_default(); + + let new_indent = line + .chars() + .take_while(|&c| c.is_whitespace()) + .collect::(); + + // Use the same whitespace character as original_indent (tabs or spaces) + let indent_char = if original_indent.contains('\t') { + "\t" + } else { + " " + }; + let relative_indent = if new_indent.len() >= old_indent.len() { + new_indent.len() - old_indent.len() + } else { + 0 // Don't reduce indentation below original + }; + format!( + "{}{}{}", + &original_indent, + &indent_char.repeat(relative_indent), + line.trim_start() + ) + }) + .collect(); + + let mut content_lines = content_lines.clone(); + content_lines.splice(i..i + old_lines.len(), new_lines); + modified_content = content_lines.join("\n"); + match_found = true; + break; + } + } + if !match_found { + return Err(RpcError::internal_error() + .with_message(format!( + "Could not find exact match for edit:\n{}", + edit.old_text + )) + .into()); + } + } + + let diff = self.create_unified_diff( + &content_str, + &modified_content, + Some(valid_path.display().to_string()), + ); + + // Format diff with appropriate number of backticks + let mut num_backticks = 3; + while diff.contains(&"`".repeat(num_backticks)) { + num_backticks += 1; + } + let formatted_diff = format!( + "{}diff\n{}{}\n\n", + "`".repeat(num_backticks), + diff, + "`".repeat(num_backticks) + ); + + let is_dry_run = dry_run.unwrap_or(false); + + if !is_dry_run { + let target = save_to.unwrap_or(valid_path.as_path()); + let modified_content = modified_content.replace("\n", original_line_ending); + tokio::fs::write(target, modified_content).await?; + } + + Ok(formatted_diff) + } + + pub fn escape_regex(&self, text: &str) -> String { + // Covers special characters in regex engines (RE2, PCRE, JS, Python) + const SPECIAL_CHARS: &[char] = &[ + '.', '^', '$', '*', '+', '?', '(', ')', '[', ']', '{', '}', '\\', '|', '/', + ]; + + let mut escaped = String::with_capacity(text.len()); + + for ch in text.chars() { + if SPECIAL_CHARS.contains(&ch) { + escaped.push('\\'); + } + escaped.push(ch); + } + + escaped + } + + // Searches the content of a file for occurrences of the given query string. + /// + /// This method searches the file specified by `file_path` for lines matching the `query`. + /// The search can be performed as a regular expression or as a literal string, + /// depending on the `is_regex` flag. + /// + /// If matched line is larger than 255 characters, a snippet will be extracted around the matched text. + /// + pub fn content_search( + &self, + query: &str, + file_path: impl AsRef, + is_regex: Option, + ) -> ServiceResult> { + let query = if is_regex.unwrap_or_default() { + query.to_string() + } else { + self.escape_regex(query) + }; + + let matcher = RegexMatcherBuilder::new() + .case_insensitive(true) + .build(query.as_str())?; + + let mut searcher = Searcher::new(); + let mut result = FileSearchResult { + file_path: file_path.as_ref().to_path_buf(), + matches: vec![], + }; + + searcher.set_binary_detection(BinaryDetection::quit(b'\x00')); + + searcher.search_path( + &matcher, + file_path, + UTF8(|line_number, line| { + let actual_match = matcher.find(line.as_bytes())?.unwrap(); + + result.matches.push(ContentMatchResult { + line_number, + start_pos: actual_match.start(), + line_text: self.extract_snippet(line, actual_match, None, None), + }); + Ok(true) + }), + )?; + + if result.matches.is_empty() { + return Ok(None); + } + + Ok(Some(result)) + } + + /// Extracts a snippet from a given line of text around a match. + /// + /// It extracts a substring starting a fixed number of characters (`SNIPPET_BACKWARD_CHARS`) + /// before the start position of the `match`, and extends up to `max_length` characters + /// If the snippet does not include the beginning or end of the original line, ellipses (`"..."`) are added + /// to indicate the truncation. + pub fn extract_snippet( + &self, + line: &str, + match_result: Match, + max_length: Option, + backward_chars: Option, + ) -> String { + let max_length = max_length.unwrap_or(SNIPPET_MAX_LENGTH); + let backward_chars = backward_chars.unwrap_or(SNIPPET_BACKWARD_CHARS); + + // Calculate the number of leading whitespace bytes to adjust for trimmed input + let start_pos = line.len() - line.trim_start().len(); + // Trim leading and trailing whitespace from the input line + let line = line.trim(); + + // Calculate the desired start byte index by adjusting match start for trimming and backward chars + // match_result.start() is the byte index in the original string + // Subtract start_pos to account for trimmed whitespace and backward_chars to include context before the match + let desired_start = (match_result.start() - start_pos).saturating_sub(backward_chars); + + // Find the nearest valid UTF-8 character boundary at or after desired_start + // Prevents "byte index is not a char boundary" panic by ensuring the slice starts at a valid character (issue #37) + let snippet_start = line + .char_indices() + .map(|(i, _)| i) + .find(|&i| i >= desired_start) + .unwrap_or(desired_start.min(line.len())); + // Initialize a counter for tracking characters to respect max_length + let mut char_count = 0; + + // Calculate the desired end byte index by counting max_length characters from snippet_start + // Take max_length + 1 to find the boundary after the last desired character + let desired_end = line[snippet_start..] + .char_indices() + .take(max_length + 1) + .find(|&(_, _)| { + char_count += 1; + char_count > max_length + }) + .map(|(i, _)| snippet_start + i) + .unwrap_or(line.len()); + + // Ensure snippet_end is a valid UTF-8 character boundary at or after desired_end + // This prevents slicing issues with multi-byte characters + let snippet_end = line + .char_indices() + .map(|(i, _)| i) + .find(|&i| i >= desired_end) + .unwrap_or(line.len()); + + // Cap snippet_end to avoid exceeding the string length + let snippet_end = snippet_end.min(line.len()); + + // Extract the snippet from the trimmed line using the calculated byte indices + let snippet = &line[snippet_start..snippet_end]; + + let mut result = String::new(); + // Add leading ellipsis if the snippet doesn't start at the beginning of the trimmed line + if snippet_start > 0 { + result.push_str("..."); + } + + result.push_str(snippet); + + // Add trailing ellipsis if the snippet doesn't reach the end of the trimmed line + if snippet_end < line.len() { + result.push_str("..."); + } + result + } + + #[allow(clippy::too_many_arguments)] + pub async fn search_files_content( + &self, + root_path: impl AsRef, + pattern: &str, + query: &str, + is_regex: bool, + exclude_patterns: Option>, + min_bytes: Option, + max_bytes: Option, + ) -> ServiceResult> { + let files_iter = self + .search_files_iter( + root_path.as_ref(), + pattern.to_string(), + exclude_patterns.to_owned().unwrap_or_default(), + min_bytes, + max_bytes, + ) + .await?; + + let results: Vec = files_iter + .filter_map(|entry| { + self.content_search(query, entry.path(), Some(is_regex)) + .ok() + .and_then(|v| v) + }) + .collect(); + Ok(results) + } + + /// Reads the first n lines from a text file, preserving line endings. + /// Args: + /// file_path: Path to the file + /// n: Number of lines to read + /// Returns a String containing the first n lines with original line endings or an error if the path is invalid or file cannot be read. + pub async fn head_file(&self, file_path: &Path, n: usize) -> ServiceResult { + // Validate file path against allowed directories + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(file_path, allowed_directories)?; + + // Open file asynchronously and create a BufReader + let file = File::open(&valid_path).await?; + let mut reader = BufReader::new(file); + let mut result = String::with_capacity(n * 100); // Estimate capacity (avg 100 bytes/line) + let mut count = 0; + + // Read lines asynchronously, preserving line endings + let mut line = Vec::new(); + while count < n { + line.clear(); + let bytes_read = reader.read_until(b'\n', &mut line).await?; + if bytes_read == 0 { + break; // Reached EOF + } + result.push_str(&String::from_utf8_lossy(&line)); + count += 1; + } + + Ok(result) + } + + /// Reads the last n lines from a text file, preserving line endings. + /// Args: + /// file_path: Path to the file + /// n: Number of lines to read + /// Returns a String containing the last n lines with original line endings or an error if the path is invalid or file cannot be read. + pub async fn tail_file(&self, file_path: &Path, n: usize) -> ServiceResult { + // Validate file path against allowed directories + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(file_path, allowed_directories)?; + + // Open file asynchronously + let file = File::open(&valid_path).await?; + let file_size = file.metadata().await?.len(); + + // If file is empty or n is 0, return empty string + if file_size == 0 || n == 0 { + return Ok(String::new()); + } + + // Create a BufReader + let mut reader = BufReader::new(file); + let mut line_count = 0; + let mut pos = file_size; + let chunk_size = 8192; // 8KB chunks + let mut buffer = vec![0u8; chunk_size]; + let mut newline_positions = Vec::new(); + + // Read backwards to collect all newline positions + while pos > 0 { + let read_size = chunk_size.min(pos as usize); + pos -= read_size as u64; + reader.seek(SeekFrom::Start(pos)).await?; + let read_bytes = reader.read_exact(&mut buffer[..read_size]).await?; + + // Process chunk in reverse to find newlines + for (i, byte) in buffer[..read_bytes].iter().enumerate().rev() { + if *byte == b'\n' { + newline_positions.push(pos + i as u64); + line_count += 1; + } + } + } + + // Check if file ends with a non-newline character (partial last line) + if file_size > 0 { + let mut temp_reader = BufReader::new(File::open(&valid_path).await?); + temp_reader.seek(SeekFrom::End(-1)).await?; + let mut last_byte = [0u8; 1]; + temp_reader.read_exact(&mut last_byte).await?; + if last_byte[0] != b'\n' { + line_count += 1; + } + } + + // Determine start position for reading the last n lines + let start_pos = if line_count <= n { + 0 // Read from start if fewer than n lines + } else { + *newline_positions.get(line_count - n).unwrap_or(&0) + 1 + }; + + // Read forward from start_pos + reader.seek(SeekFrom::Start(start_pos)).await?; + let mut result = String::with_capacity(n * 100); // Estimate capacity + let mut line = Vec::new(); + let mut lines_read = 0; + + while lines_read < n { + line.clear(); + let bytes_read = reader.read_until(b'\n', &mut line).await?; + if bytes_read == 0 { + // Handle partial last line at EOF + if !line.is_empty() { + result.push_str(&String::from_utf8_lossy(&line)); + } + break; + } + result.push_str(&String::from_utf8_lossy(&line)); + lines_read += 1; + } + + Ok(result) + } + + /// Reads lines from a text file starting at the specified offset (0-based), preserving line endings. + /// Args: + /// path: Path to the file + /// offset: Number of lines to skip (0-based) + /// limit: Optional maximum number of lines to read + /// Returns a String containing the selected lines with original line endings or an error if the path is invalid or file cannot be read. + pub async fn read_file_lines( + &self, + path: &Path, + offset: usize, + limit: Option, + ) -> ServiceResult { + // Validate file path against allowed directories + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(path, allowed_directories)?; + + // Open file and get metadata before moving into BufReader + let file = File::open(&valid_path).await?; + let file_size = file.metadata().await?.len(); + let mut reader = BufReader::new(file); + + // If file is empty or limit is 0, return empty string + if file_size == 0 || limit == Some(0) { + return Ok(String::new()); + } + + // Skip offset lines (0-based indexing) + let mut buffer = Vec::new(); + for _ in 0..offset { + buffer.clear(); + if reader.read_until(b'\n', &mut buffer).await? == 0 { + return Ok(String::new()); // EOF before offset + } + } + + // Read lines up to limit (or all remaining if limit is None) + let mut result = String::with_capacity(limit.unwrap_or(100) * 100); // Estimate capacity + match limit { + Some(max_lines) => { + for _ in 0..max_lines { + buffer.clear(); + let bytes_read = reader.read_until(b'\n', &mut buffer).await?; + if bytes_read == 0 { + break; // Reached EOF + } + result.push_str(&String::from_utf8_lossy(&buffer)); + } + } + None => { + loop { + buffer.clear(); + let bytes_read = reader.read_until(b'\n', &mut buffer).await?; + if bytes_read == 0 { + break; // Reached EOF + } + result.push_str(&String::from_utf8_lossy(&buffer)); + } + } + } + + Ok(result) + } + + /// Calculates the total size (in bytes) of all files within a directory tree. + /// + /// This function recursively searches the specified `root_path` for files, + /// filters out directories and non-file entries, and sums the sizes of all found files. + /// The size calculation is parallelized using Rayon for improved performance on large directories. + /// + /// # Arguments + /// * `root_path` - The root directory path to start the size calculation. + /// + /// # Returns + /// Returns a `ServiceResult` containing the total size in bytes of all files under the `root_path`. + /// + /// # Notes + /// - Only files are included in the size calculation; directories and other non-file entries are ignored. + /// - The search pattern is `"**/*"` (all files) and no exclusions are applied. + /// - Parallel iteration is used to speed up the metadata fetching and summation. + pub async fn calculate_directory_size(&self, root_path: &Path) -> ServiceResult { + let entries = self + .search_files_iter(root_path, "**/*".to_string(), vec![], None, None) + .await? + .filter(|e| e.file_type().is_file()); // Only process files + + // Use rayon to parallelize size summation + let total_size: u64 = entries + .par_bridge() // Convert to parallel iterator + .filter_map(|entry| entry.metadata().ok().map(|meta| meta.len())) + .sum(); + + Ok(total_size) + } + + /// Recursively finds all empty directories within the given root path. + /// + /// A directory is considered empty if it contains no files in itself or any of its subdirectories + /// except OS metadata files: `.DS_Store` (macOS) and `Thumbs.db` (Windows) + /// Empty subdirectories are allowed. You can optionally provide a list of glob-style patterns in + /// `exclude_patterns` to ignore certain paths during the search (e.g., to skip system folders or hidden directories). + /// + /// # Arguments + /// - `root_path`: The starting directory to search. + /// - `exclude_patterns`: Optional list of glob patterns to exclude from the search. + /// Directories matching these patterns will be ignored. + /// + /// # Errors + /// Returns an error if the root path is invalid or inaccessible. + /// + /// # Returns + /// A list of paths to empty directories, as strings, including parent directories that contain only empty subdirectories. + /// Recursively finds all empty directories within the given root path. + /// + /// A directory is considered empty if it contains no files in itself or any of its subdirectories. + /// Empty subdirectories are allowed. You can optionally provide a list of glob-style patterns in + /// `exclude_patterns` to ignore certain paths during the search (e.g., to skip system folders or hidden directories). + /// + /// # Arguments + /// - `root_path`: The starting directory to search. + /// - `exclude_patterns`: Optional list of glob patterns to exclude from the search. + /// Directories matching these patterns will be ignored. + /// + /// # Errors + /// Returns an error if the root path is invalid or inaccessible. + /// + /// # Returns + /// A list of paths to all empty directories, as strings, including parent directories that contain only empty subdirectories. + pub async fn find_empty_directories( + &self, + root_path: &Path, + exclude_patterns: Option>, + ) -> ServiceResult> { + let walker = self + .search_files_iter( + root_path, + "**/*".to_string(), + exclude_patterns.unwrap_or_default(), + None, + None, + ) + .await? + .filter(|e| e.file_type().is_dir()); // Only directories + + let mut empty_dirs = Vec::new(); + + // Check each directory for emptiness + for entry in walker { + let is_empty = WalkDir::new(entry.path()) + .into_iter() + .filter_map(|e| e.ok()) + .all(|e| !e.file_type().is_file() || is_system_metadata_file(e.file_name())); // Directory is empty if no files are found in it or subdirs, ".DS_Store" will be ignores on Mac + + if is_empty { + if let Some(path_str) = entry.path().to_str() { + empty_dirs.push(path_str.to_string()); + } + } + } + + Ok(empty_dirs) + } + + /// Finds groups of duplicate files within the given root path. + /// Returns a vector of vectors, where each inner vector contains paths to files with identical content. + /// Files are considered duplicates if they have the same size and SHA-256 hash. + pub async fn find_duplicate_files( + &self, + root_path: &Path, + pattern: Option, + exclude_patterns: Option>, + min_bytes: Option, + max_bytes: Option, + ) -> ServiceResult>> { + // Validate root path against allowed directories + let allowed_directories = self.allowed_directories().await; + let valid_path = self.validate_path(root_path, allowed_directories)?; + + // Get Tokio runtime handle + let rt = tokio::runtime::Handle::current(); + + // Step 1: Collect files and group by size + let mut size_map: HashMap> = HashMap::new(); + let entries = self + .search_files_iter( + &valid_path, + pattern.unwrap_or("**/*".to_string()), + exclude_patterns.unwrap_or_default(), + min_bytes, + max_bytes, + ) + .await? + .filter(|e| e.file_type().is_file()); // Only files + + for entry in entries { + if let Ok(metadata) = entry.metadata() { + if let Some(path_str) = entry.path().to_str() { + size_map + .entry(metadata.len()) + .or_default() + .push(path_str.to_string()); + } + } + } + + // Filter out sizes with only one file (no duplicates possible) + let size_groups: Vec> = size_map + .into_iter() + .collect::>() // Collect into Vec to enable parallel iteration + .into_par_iter() + .filter(|(_, paths)| paths.len() > 1) + .map(|(_, paths)| paths) + .collect(); + + // Step 2: Group by quick hash (first 4KB) + let mut quick_hash_map: HashMap, Vec> = HashMap::new(); + for paths in size_groups.into_iter() { + let quick_hashes: Vec<(String, Vec)> = paths + .into_par_iter() + .filter_map(|path| { + let rt = rt.clone(); // Clone the runtime handle for this task + rt.block_on(async { + let file = File::open(&path).await.ok()?; + let mut reader = tokio::io::BufReader::new(file); + let mut buffer = vec![0u8; 4096]; // Read first 4KB + let bytes_read = reader.read(&mut buffer).await.ok()?; + let mut hasher = Sha256::new(); + hasher.update(&buffer[..bytes_read]); + Some((path, hasher.finalize().to_vec())) + }) + }) + .collect(); + + for (path, hash) in quick_hashes { + quick_hash_map.entry(hash).or_default().push(path); + } + } + + // Step 3: Group by full hash for groups with multiple files + let mut full_hash_map: HashMap, Vec> = HashMap::new(); + let filtered_quick_hashes: Vec<(Vec, Vec)> = quick_hash_map + .into_iter() + .collect::>() + .into_par_iter() + .filter(|(_, paths)| paths.len() > 1) + .collect(); + + for (_quick_hash, paths) in filtered_quick_hashes { + let full_hashes: Vec<(String, Vec)> = paths + .into_par_iter() + .filter_map(|path| { + let rt = rt.clone(); // Clone the runtime handle for this task + rt.block_on(async { + let file = File::open(&path).await.ok()?; + let mut reader = tokio::io::BufReader::new(file); + let mut hasher = Sha256::new(); + let mut buffer = vec![0u8; 8192]; // 8KB chunks + loop { + let bytes_read = reader.read(&mut buffer).await.ok()?; + if bytes_read == 0 { + break; + } + hasher.update(&buffer[..bytes_read]); + } + Some((path, hasher.finalize().to_vec())) + }) + }) + .collect(); + + for (path, hash) in full_hashes { + full_hash_map.entry(hash).or_default().push(path); + } + } + + // Collect groups of duplicates (only groups with more than one file) + let duplicates: Vec> = full_hash_map + .into_values() + .filter(|group| group.len() > 1) + .collect(); + + Ok(duplicates) + } +} diff --git a/src/fs_service/io/read.rs b/src/fs_service/io/read.rs index 575d5b9..7296171 100644 --- a/src/fs_service/io/read.rs +++ b/src/fs_service/io/read.rs @@ -20,11 +20,25 @@ use tokio::{ const MAX_CONCURRENT_FILE_READ: usize = 5; impl FileSystemService { - pub async fn read_text_file(&self, file_path: &Path) -> ServiceResult { + pub async fn read_text_file( + &self, + file_path: &Path, + with_line_numbers: bool, + ) -> ServiceResult { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(file_path, allowed_directories)?; let content = tokio::fs::read_to_string(valid_path).await?; - Ok(content) + + if with_line_numbers { + Ok(content + .lines() + .enumerate() + .map(|(i, line)| format!("{:>6} | {}", i + 1, line)) + .collect::>() + .join("\n")) + } else { + Ok(content) + } } /// Reads the first n lines from a text file, preserving line endings. diff --git a/src/tools/read_multiple_text_files.rs b/src/tools/read_multiple_text_files.rs index 91923e4..efee983 100644 --- a/src/tools/read_multiple_text_files.rs +++ b/src/tools/read_multiple_text_files.rs @@ -35,7 +35,7 @@ impl ReadMultipleTextFiles { .map(|path| async move { { let content = context - .read_text_file(Path::new(&path)) + .read_text_file(Path::new(&path), false) .await .map_err(CallToolError::new); diff --git a/src/tools/read_text_file.rs b/src/tools/read_text_file.rs index 3872625..1ea83d7 100644 --- a/src/tools/read_text_file.rs +++ b/src/tools/read_text_file.rs @@ -12,7 +12,8 @@ use crate::fs_service::FileSystemService; description = concat!("Read the complete contents of a text file from the file system as text. ", "Handles various text encodings and provides detailed error messages if the ", "file cannot be read. Use this tool when you need to examine the contents of ", - "a single file. Only works within allowed directories."), + "a single file. Optionally include line numbers for precise code targeting. ", + "Only works within allowed directories."), destructive_hint = false, idempotent_hint = false, open_world_hint = false, @@ -22,6 +23,11 @@ use crate::fs_service::FileSystemService; pub struct ReadTextFile { /// The path of the file to read. pub path: String, + /// Optional: Include line numbers in output (default: false). + /// When enabled, each line is prefixed with its line number (1-based). + /// Useful for AI agents that need to target specific lines for code patches. + #[serde(default)] + pub with_line_numbers: Option, } impl ReadTextFile { @@ -30,7 +36,10 @@ impl ReadTextFile { context: &FileSystemService, ) -> std::result::Result { let content = context - .read_text_file(Path::new(¶ms.path)) + .read_text_file( + Path::new(¶ms.path), + params.with_line_numbers.unwrap_or(false), + ) .await .map_err(CallToolError::new)?; diff --git a/tests/test_fs_service.rs b/tests/test_fs_service.rs index df67283..c3985cd 100644 --- a/tests/test_fs_service.rs +++ b/tests/test_fs_service.rs @@ -230,10 +230,86 @@ async fn test_unzip_file_non_existent() { async fn test_read_file() { let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); let file_path = create_temp_file(temp_dir.join("dir1").as_path(), "test.txt", "content"); - let content = service.read_text_file(&file_path).await.unwrap(); + let content = service.read_text_file(&file_path, false).await.unwrap(); assert_eq!(content, "content"); } +#[tokio::test] +async fn test_read_text_file_with_line_numbers() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_temp_file( + temp_dir.join("dir1").as_path(), + "test.txt", + "line1\nline2\nline3" + ); + let content = service.read_text_file(&file_path, true).await.unwrap(); + assert_eq!(content, " 1 | line1\n 2 | line2\n 3 | line3"); +} + +#[tokio::test] +async fn test_read_text_file_without_line_numbers() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_temp_file( + temp_dir.join("dir1").as_path(), + "test.txt", + "line1\nline2\nline3" + ); + let content = service.read_text_file(&file_path, false).await.unwrap(); + assert_eq!(content, "line1\nline2\nline3"); +} + +#[tokio::test] +async fn test_read_text_file_with_line_numbers_empty_file() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_temp_file(temp_dir.join("dir1").as_path(), "empty.txt", ""); + let content = service.read_text_file(&file_path, true).await.unwrap(); + assert_eq!(content, ""); +} + +#[tokio::test] +async fn test_read_text_file_with_line_numbers_single_line() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_temp_file(temp_dir.join("dir1").as_path(), "single.txt", "single line"); + let content = service.read_text_file(&file_path, true).await.unwrap(); + assert_eq!(content, " 1 | single line"); +} + +#[tokio::test] +async fn test_read_text_file_with_line_numbers_no_trailing_newline() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_temp_file( + temp_dir.join("dir1").as_path(), + "no_newline.txt", + "line1\nline2" + ); + let content = service.read_text_file(&file_path, true).await.unwrap(); + assert_eq!(content, " 1 | line1\n 2 | line2"); +} + +#[tokio::test] +async fn test_read_text_file_with_line_numbers_large_file() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + // Create a file with more than 999 lines to test padding + let mut lines = Vec::new(); + for i in 1..=1000 { + lines.push(format!("line{}", i)); + } + let file_content = lines.join("\n"); + let file_path = create_temp_file( + temp_dir.join("dir1").as_path(), + "large.txt", + &file_content + ); + let content = service.read_text_file(&file_path, true).await.unwrap(); + + // Check first line + assert!(content.starts_with(" 1 | line1\n")); + // Check line 999 + assert!(content.contains(" 999 | line999\n")); + // Check line 1000 (6 digits with right padding) + assert!(content.contains(" 1000 | line1000")); +} + #[tokio::test] async fn test_create_directory() { let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); From 5131280f1376f404d1106b8205bdeacabfc7e646 Mon Sep 17 00:00:00 2001 From: Abderraouf Belalia Date: Wed, 29 Oct 2025 03:02:09 +0100 Subject: [PATCH 2/5] style: fix formatting and clippy warnings [agent commit] --- src/fs_service.rs | 6 +++++- tests/test_fs_service.rs | 14 +++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/fs_service.rs b/src/fs_service.rs index 153dd52..eb4eee6 100644 --- a/src/fs_service.rs +++ b/src/fs_service.rs @@ -575,7 +575,11 @@ impl FileSystemService { Ok(base64_string) } - pub async fn read_text_file(&self, file_path: &Path, with_line_numbers: bool) -> ServiceResult { + pub async fn read_text_file( + &self, + file_path: &Path, + with_line_numbers: bool, + ) -> ServiceResult { let allowed_directories = self.allowed_directories().await; let valid_path = self.validate_path(file_path, allowed_directories)?; let content = tokio::fs::read_to_string(valid_path).await?; diff --git a/tests/test_fs_service.rs b/tests/test_fs_service.rs index c3985cd..519841f 100644 --- a/tests/test_fs_service.rs +++ b/tests/test_fs_service.rs @@ -240,7 +240,7 @@ async fn test_read_text_file_with_line_numbers() { let file_path = create_temp_file( temp_dir.join("dir1").as_path(), "test.txt", - "line1\nline2\nline3" + "line1\nline2\nline3", ); let content = service.read_text_file(&file_path, true).await.unwrap(); assert_eq!(content, " 1 | line1\n 2 | line2\n 3 | line3"); @@ -252,7 +252,7 @@ async fn test_read_text_file_without_line_numbers() { let file_path = create_temp_file( temp_dir.join("dir1").as_path(), "test.txt", - "line1\nline2\nline3" + "line1\nline2\nline3", ); let content = service.read_text_file(&file_path, false).await.unwrap(); assert_eq!(content, "line1\nline2\nline3"); @@ -280,7 +280,7 @@ async fn test_read_text_file_with_line_numbers_no_trailing_newline() { let file_path = create_temp_file( temp_dir.join("dir1").as_path(), "no_newline.txt", - "line1\nline2" + "line1\nline2", ); let content = service.read_text_file(&file_path, true).await.unwrap(); assert_eq!(content, " 1 | line1\n 2 | line2"); @@ -292,14 +292,10 @@ async fn test_read_text_file_with_line_numbers_large_file() { // Create a file with more than 999 lines to test padding let mut lines = Vec::new(); for i in 1..=1000 { - lines.push(format!("line{}", i)); + lines.push(format!("line{i}")); } let file_content = lines.join("\n"); - let file_path = create_temp_file( - temp_dir.join("dir1").as_path(), - "large.txt", - &file_content - ); + let file_path = create_temp_file(temp_dir.join("dir1").as_path(), "large.txt", &file_content); let content = service.read_text_file(&file_path, true).await.unwrap(); // Check first line From b3740aabd396d66821ebffedace5b139df2c0e56 Mon Sep 17 00:00:00 2001 From: Abderraouf Belalia Date: Wed, 29 Oct 2025 03:17:05 +0100 Subject: [PATCH 3/5] docs: update capabilities with line numbering parameter [agent commit] --- docs/capabilities.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/capabilities.md b/docs/capabilities.md index ede3c25..afa7862 100644 --- a/docs/capabilities.md +++ b/docs/capabilities.md @@ -230,10 +230,11 @@ read_text_file - Read the complete contents of a text file from the file system as text. Handles various text encodings and provides detailed error messages if the file cannot be read. Use this tool when you need to examine the contents of a single file. Only works within allowed directories. + Read the complete contents of a text file from the file system as text. Handles various text encodings and provides detailed error messages if the file cannot be read. Use this tool when you need to examine the contents of a single file. Optionally include line numbers for precise code targeting. Only works within allowed directories.
  • path : string
  • +
  • with_line_numbers : boolean
From 7300ee3649291096f7aba5ef685cfd63fb896783 Mon Sep 17 00:00:00 2001 From: Abderraouf Belalia Date: Mon, 3 Nov 2025 13:51:36 +0100 Subject: [PATCH 4/5] refactor: address PR review feedback for line numbering feature - Remove manual CHANGELOG.md entry (auto-generated at release) - Revert docs/capabilities.md changes (auto-generated via mcp-discovery) - Improve documentation clarity for with_line_numbers parameter format - Add test for Windows line endings (\r\n) - Add edge case tests for newline-only content Addresses review comments on PR #61 [agent commit] --- CHANGELOG.md | 10 ---------- docs/capabilities.md | 3 +-- src/tools/read_text_file.rs | 4 ++-- tests/test_fs_service.rs | 36 ++++++++++++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f974777..d240a27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,15 +1,5 @@ # Changelog -## [Unreleased] - -### 🚀 Features - -* Add optional line numbering to read_text_file tool ([#60](https://github.com/rust-mcp-stack/rust-mcp-filesystem/issues/60)) - - Added `with_line_numbers` optional parameter to `read_text_file` tool - - When enabled, prefixes each line with right-aligned line numbers and pipe separator - - Useful for AI agents that need to target specific lines for code patches - - Maintains backward compatibility with existing usage - ## [0.3.6](https://github.com/rust-mcp-stack/rust-mcp-filesystem/compare/v0.3.5...v0.3.6) (2025-10-15) diff --git a/docs/capabilities.md b/docs/capabilities.md index afa7862..ede3c25 100644 --- a/docs/capabilities.md +++ b/docs/capabilities.md @@ -230,11 +230,10 @@ read_text_file - Read the complete contents of a text file from the file system as text. Handles various text encodings and provides detailed error messages if the file cannot be read. Use this tool when you need to examine the contents of a single file. Optionally include line numbers for precise code targeting. Only works within allowed directories. + Read the complete contents of a text file from the file system as text. Handles various text encodings and provides detailed error messages if the file cannot be read. Use this tool when you need to examine the contents of a single file. Only works within allowed directories.
  • path : string
  • -
  • with_line_numbers : boolean
diff --git a/src/tools/read_text_file.rs b/src/tools/read_text_file.rs index 1ea83d7..6817db5 100644 --- a/src/tools/read_text_file.rs +++ b/src/tools/read_text_file.rs @@ -24,8 +24,8 @@ pub struct ReadTextFile { /// The path of the file to read. pub path: String, /// Optional: Include line numbers in output (default: false). - /// When enabled, each line is prefixed with its line number (1-based). - /// Useful for AI agents that need to target specific lines for code patches. + /// When enabled, each line is prefixed with a right-aligned, 1-based line number + /// Followed by a space, a vertical bar (`|`), and another space in the format: ` 123 | ` #[serde(default)] pub with_line_numbers: Option, } diff --git a/tests/test_fs_service.rs b/tests/test_fs_service.rs index 519841f..9059480 100644 --- a/tests/test_fs_service.rs +++ b/tests/test_fs_service.rs @@ -306,6 +306,42 @@ async fn test_read_text_file_with_line_numbers_large_file() { assert!(content.contains(" 1000 | line1000")); } +#[tokio::test] +async fn test_read_text_file_with_line_numbers_windows_line_endings() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + let file_path = create_temp_file( + temp_dir.join("dir1").as_path(), + "windows.txt", + "line1\r\nline2\r\nline3", + ); + let content = service.read_text_file(&file_path, true).await.unwrap(); + assert_eq!(content, " 1 | line1\n 2 | line2\n 3 | line3"); +} + +#[tokio::test] +async fn test_read_text_file_with_line_numbers_single_newline_unix() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + // A file with just "\n" is treated by lines() as having one empty line before the newline + // To get two empty lines, we need "\n\n" + let file_path = create_temp_file(temp_dir.join("dir1").as_path(), "newline_unix.txt", "\n\n"); + let content = service.read_text_file(&file_path, true).await.unwrap(); + assert_eq!(content, " 1 | \n 2 | "); +} + +#[tokio::test] +async fn test_read_text_file_with_line_numbers_single_newline_windows() { + let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); + // A file with just "\r\n" is treated by lines() as having one empty line + // To get two empty lines, we need "\r\n\r\n" + let file_path = create_temp_file( + temp_dir.join("dir1").as_path(), + "newline_windows.txt", + "\r\n\r\n", + ); + let content = service.read_text_file(&file_path, true).await.unwrap(); + assert_eq!(content, " 1 | \n 2 | "); +} + #[tokio::test] async fn test_create_directory() { let (temp_dir, service, _allowed_dirs) = setup_service(vec!["dir1".to_string()]); From 1ecacc4f0934db550cf40f68e5d8f5f073544f8f Mon Sep 17 00:00:00 2001 From: Abderraouf Belalia Date: Mon, 3 Nov 2025 14:33:47 +0100 Subject: [PATCH 5/5] fix: correct fs_service.rs module structure after rebase [agent commit] --- src/fs_service.rs | 1614 +-------------------------------------------- 1 file changed, 7 insertions(+), 1607 deletions(-) diff --git a/src/fs_service.rs b/src/fs_service.rs index eb4eee6..89ba6e1 100644 --- a/src/fs_service.rs +++ b/src/fs_service.rs @@ -1,1609 +1,9 @@ -pub mod file_info; +mod archive; +mod core; +mod io; +mod search; pub mod utils; -use crate::{ - error::{ServiceError, ServiceResult}, - fs_service::utils::is_system_metadata_file, - tools::EditOperation, -}; -use async_zip::tokio::{read::seek::ZipFileReader, write::ZipFileWriter}; -use base64::{engine::general_purpose, write::EncoderWriter}; -use file_info::FileInfo; -use futures::{StreamExt, stream}; -use glob_match::glob_match; -use grep::{ - matcher::{Match, Matcher}, - regex::RegexMatcherBuilder, - searcher::{BinaryDetection, Searcher, sinks::UTF8}, -}; -use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator}; -use rust_mcp_sdk::schema::RpcError; -use serde_json::{Value, json}; -use sha2::{Digest, Sha256}; -use similar::TextDiff; -use std::{ - collections::{HashMap, HashSet}, - env, - fs::{self}, - io::{SeekFrom, Write}, - path::{Path, PathBuf}, - sync::Arc, -}; -use tokio::{ - fs::{File, metadata}, - io::{AsyncBufReadExt, AsyncReadExt, AsyncSeekExt, AsyncWriteExt, BufReader}, - sync::RwLock, -}; -use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; -use utils::{ - contains_symlink, expand_home, format_bytes, normalize_line_endings, normalize_path, - write_zip_entry, -}; -use walkdir::WalkDir; -const SNIPPET_MAX_LENGTH: usize = 200; -const SNIPPET_BACKWARD_CHARS: usize = 30; -const MAX_CONCURRENT_FILE_READ: usize = 5; - -#[cfg(windows)] -pub const OS_LINE_ENDING: &str = "\r\n"; -#[cfg(not(windows))] -pub const OS_LINE_ENDING: &str = "\n"; - -type PathResultList = Vec>; - -pub struct FileSystemService { - allowed_path: RwLock>>, -} - -/// Represents a single match found in a file's content. -#[derive(Debug, Clone)] -pub struct ContentMatchResult { - /// The line number where the match occurred (1-based). - pub line_number: u64, - pub start_pos: usize, - /// The line of text containing the match. - /// If the line exceeds 255 characters (excluding the search term), only a truncated portion will be shown. - pub line_text: String, -} - -/// Represents all matches found in a specific file. -#[derive(Debug, Clone)] -pub struct FileSearchResult { - /// The path to the file where matches were found. - pub file_path: PathBuf, - /// All individual match results within the file. - pub matches: Vec, -} - -/// This addresses the issue with the DockerHub mcp-registry & mcp-gateway where tool discovery fails to resolve -/// references to 'example' or 'default' values when running the run->command from the server.yaml file -/// should be removed once mcp-gateway is more mature -/// reference: https://github.com/docker/mcp-registry/blob/7d815fac2f3b7a9717eebc3f3db215de3ce3c3c7/internal/mcp/client.go#L170-L173 -#[allow(clippy::ptr_arg)] -fn fix_dockerhub_mcp_registry_gateway(input: &String) -> &str { - if input.contains("{{rust-mcp-filesystem.allowed_directories|volume-target|into}}") { - "." - } else { - input - } -} - -impl FileSystemService { - pub fn try_new(allowed_directories: &[String]) -> ServiceResult { - let normalized_dirs: Vec = allowed_directories - .iter() - .map(fix_dockerhub_mcp_registry_gateway) - .map_while(|dir| { - let expand_result = expand_home(dir.into()); - if !expand_result.is_dir() { - panic!("{}", format!("Error: {dir} is not a directory")); - } - Some(expand_result) - }) - .collect(); - - Ok(Self { - allowed_path: RwLock::new(Arc::new(normalized_dirs)), - }) - } - - pub async fn allowed_directories(&self) -> Arc> { - let guard = self.allowed_path.read().await; - guard.clone() - } -} - -impl FileSystemService { - pub fn valid_roots(&self, roots: Vec<&str>) -> ServiceResult<(Vec, Option)> { - let paths: Vec> = roots - .iter() - .map(|p| self.parse_file_path(p)) - .collect::>(); - - // Partition into Ok and Err results - let (ok_paths, err_paths): (PathResultList, PathResultList) = - paths.into_iter().partition(|p| p.is_ok()); - - // using HashSet to remove duplicates - let (valid_roots, no_dir_roots): (HashSet, HashSet) = ok_paths - .into_iter() - .collect::, _>>()? - .into_iter() - .map(expand_home) - .partition(|path| path.is_dir()); - - let skipped_roots = if !err_paths.is_empty() || !no_dir_roots.is_empty() { - Some(format!( - "Warning: skipped {} invalid roots.", - err_paths.len() + no_dir_roots.len() - )) - } else { - None - }; - - let valid_roots = valid_roots.into_iter().collect(); - - Ok((valid_roots, skipped_roots)) - } - - pub async fn update_allowed_paths(&self, valid_roots: Vec) { - let mut guard = self.allowed_path.write().await; - *guard = Arc::new(valid_roots) - } - - /// Converts a string to a `PathBuf`, supporting both raw paths and `file://` URIs. - fn parse_file_path(&self, input: &str) -> ServiceResult { - Ok(PathBuf::from( - input.strip_prefix("file://").unwrap_or(input).trim(), - )) - } - - pub fn validate_path( - &self, - requested_path: &Path, - allowed_directories: Arc>, - ) -> ServiceResult { - if allowed_directories.is_empty() { - return Err(ServiceError::FromString( - "Allowed directories list is empty. Client did not provide any valid root directories.".to_string() - )); - } - - // Expand ~ to home directory - let expanded_path = expand_home(requested_path.to_path_buf()); - - // Resolve the absolute path - let absolute_path = if expanded_path.as_path().is_absolute() { - expanded_path.clone() - } else { - env::current_dir().unwrap().join(&expanded_path) - }; - - // Normalize the path - let normalized_requested = normalize_path(&absolute_path); - - // Check if path is within allowed directories - if !allowed_directories.iter().any(|dir| { - // Must account for both scenarios — the requested path may not exist yet, making canonicalization impossible. - normalized_requested.starts_with(dir) - || normalized_requested.starts_with(normalize_path(dir)) - }) { - let symlink_target = if contains_symlink(&absolute_path)? { - "a symlink target path" - } else { - "path" - }; - return Err(ServiceError::FromString(format!( - "Access denied - {} is outside allowed directories: {} not in {}", - symlink_target, - absolute_path.display(), - allowed_directories - .iter() - .map(|p| p.display().to_string()) - .collect::>() - .join(",\n"), - ))); - } - - Ok(absolute_path) - } - - // Get file stats - pub async fn get_file_stats(&self, file_path: &Path) -> ServiceResult { - let allowed_directories = self.allowed_directories().await; - let valid_path = self.validate_path(file_path, allowed_directories)?; - - let metadata = fs::metadata(valid_path)?; - - let size = metadata.len(); - let created = metadata.created().ok(); - let modified = metadata.modified().ok(); - let accessed = metadata.accessed().ok(); - let is_directory = metadata.is_dir(); - let is_file = metadata.is_file(); - - Ok(FileInfo { - size, - created, - modified, - accessed, - is_directory, - is_file, - metadata, - }) - } - - fn detect_line_ending(&self, text: &str) -> &str { - if text.contains("\r\n") { - "\r\n" - } else if text.contains('\r') { - "\r" - } else { - "\n" - } - } - - pub async fn zip_directory( - &self, - input_dir: String, - pattern: String, - target_zip_file: String, - ) -> ServiceResult { - let allowed_directories = self.allowed_directories().await; - let valid_dir_path = - self.validate_path(Path::new(&input_dir), allowed_directories.clone())?; - - let input_dir_str = &valid_dir_path - .as_os_str() - .to_str() - .ok_or(std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "Invalid UTF-8 in file name", - ))?; - - let target_path = - self.validate_path(Path::new(&target_zip_file), allowed_directories.clone())?; - - if target_path.exists() { - return Err(std::io::Error::new( - std::io::ErrorKind::AlreadyExists, - format!("'{target_zip_file}' already exists!"), - ) - .into()); - } - - let updated_pattern = if pattern.contains('*') { - pattern.to_lowercase() - } else { - format!("*{}*", &pattern.to_lowercase()) - }; - - let glob_pattern = &updated_pattern; - - let entries: Vec<_> = WalkDir::new(&valid_dir_path) - .follow_links(true) - .into_iter() - .filter_map(|entry| entry.ok()) - .filter_map(|entry| { - let full_path = entry.path(); - - self.validate_path(full_path, allowed_directories.clone()) - .ok() - .and_then(|path| { - if path != valid_dir_path - && glob_match(glob_pattern, path.display().to_string().as_ref()) - { - Some(path) - } else { - None - } - }) - }) - .collect(); - - let zip_file = File::create(&target_path).await?; - let mut zip_writer = ZipFileWriter::new(zip_file.compat()); - - for entry_path_buf in &entries { - if entry_path_buf.is_dir() { - continue; - } - let entry_path = entry_path_buf.as_path(); - let entry_str = entry_path.as_os_str().to_str().ok_or(std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "Invalid UTF-8 in file name", - ))?; - - if !entry_str.starts_with(input_dir_str) { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "Entry file path does not start with base input directory path.", - ) - .into()); - } - - let entry_str = &entry_str[input_dir_str.len() + 1..]; - write_zip_entry(entry_str, entry_path, &mut zip_writer).await?; - } - - let z_file = zip_writer.close().await?; - let zip_file_size = if let Ok(meta_data) = z_file.into_inner().metadata().await { - format_bytes(meta_data.len()) - } else { - "unknown".to_string() - }; - let result_message = format!( - "Successfully compressed '{}' directory into '{}' ({}).", - input_dir, - target_path.display(), - zip_file_size - ); - Ok(result_message) - } - - pub async fn zip_files( - &self, - input_files: Vec, - target_zip_file: String, - ) -> ServiceResult { - let file_count = input_files.len(); - - if file_count == 0 { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "No file(s) to zip. The input files array is empty.", - ) - .into()); - } - let allowed_directories = self.allowed_directories().await; - let target_path = - self.validate_path(Path::new(&target_zip_file), allowed_directories.clone())?; - - if target_path.exists() { - return Err(std::io::Error::new( - std::io::ErrorKind::AlreadyExists, - format!("'{target_zip_file}' already exists!"), - ) - .into()); - } - - let source_paths = input_files - .iter() - .map(|p| self.validate_path(Path::new(p), allowed_directories.clone())) - .collect::, _>>()?; - - let zip_file = File::create(&target_path).await?; - let mut zip_writer = ZipFileWriter::new(zip_file.compat()); - for path in source_paths { - let filename = path.file_name().ok_or(std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "Invalid path!", - ))?; - - let filename = filename.to_str().ok_or(std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "Invalid UTF-8 in file name", - ))?; - - write_zip_entry(filename, &path, &mut zip_writer).await?; - } - let z_file = zip_writer.close().await?; - - let zip_file_size = if let Ok(meta_data) = z_file.into_inner().metadata().await { - format_bytes(meta_data.len()) - } else { - "unknown".to_string() - }; - - let result_message = format!( - "Successfully compressed {} {} into '{}' ({}).", - file_count, - if file_count == 1 { "file" } else { "files" }, - target_path.display(), - zip_file_size - ); - Ok(result_message) - } - - pub async fn unzip_file(&self, zip_file: &str, target_dir: &str) -> ServiceResult { - let allowed_directories = self.allowed_directories().await; - - let zip_file = self.validate_path(Path::new(&zip_file), allowed_directories.clone())?; - let target_dir_path = self.validate_path(Path::new(target_dir), allowed_directories)?; - if !zip_file.exists() { - return Err(std::io::Error::new( - std::io::ErrorKind::NotFound, - "Zip file does not exists.", - ) - .into()); - } - - if target_dir_path.exists() { - return Err(std::io::Error::new( - std::io::ErrorKind::AlreadyExists, - format!("'{target_dir}' directory already exists!"), - ) - .into()); - } - - let file = BufReader::new(File::open(zip_file).await?); - let mut zip = ZipFileReader::with_tokio(file).await?; - - let file_count = zip.file().entries().len(); - - for index in 0..file_count { - let entry = zip.file().entries().get(index).unwrap(); - let entry_path = target_dir_path.join(entry.filename().as_str()?); - // Ensure the parent directory exists - if let Some(parent) = entry_path.parent() { - tokio::fs::create_dir_all(parent).await?; - } - - // Extract the file - let reader = zip.reader_without_entry(index).await?; - let mut compat_reader = reader.compat(); - let mut output_file = File::create(&entry_path).await?; - - tokio::io::copy(&mut compat_reader, &mut output_file).await?; - output_file.flush().await?; - } - - let result_message = format!( - "Successfully extracted {} {} into '{}'.", - file_count, - if file_count == 1 { "file" } else { "files" }, - target_dir_path.display() - ); - - Ok(result_message) - } - - pub fn mime_from_path(&self, path: &Path) -> ServiceResult { - let is_svg = path - .extension() - .is_some_and(|e| e.to_str().is_some_and(|s| s == "svg")); - // consider it is a svg file as we cannot detect svg from bytes pattern - if is_svg { - return Ok(infer::Type::new( - infer::MatcherType::Image, - "image/svg+xml", - "svg", - |_: &[u8]| true, - )); - - // infer::Type::new(infer::MatcherType::Image, "", "svg",); - } - let kind = infer::get_from_path(path)?.ok_or(ServiceError::FromString( - "File tyle is unknown!".to_string(), - ))?; - Ok(kind) - } - - pub fn filesize_in_range( - &self, - file_size: u64, - min_bytes: Option, - max_bytes: Option, - ) -> bool { - if min_bytes.is_none() && max_bytes.is_none() { - return true; - } - match (min_bytes, max_bytes) { - (_, Some(max)) if file_size > max => false, - (Some(min), _) if file_size < min => false, - _ => true, - } - } - - pub async fn validate_file_size>( - &self, - path: P, - min_bytes: Option, - max_bytes: Option, - ) -> ServiceResult<()> { - if min_bytes.is_none() && max_bytes.is_none() { - return Ok(()); - } - - let file_size = metadata(&path).await?.len() as usize; - - match (min_bytes, max_bytes) { - (_, Some(max)) if file_size > max => Err(ServiceError::FileTooLarge(max)), - (Some(min), _) if file_size < min => Err(ServiceError::FileTooSmall(min)), - _ => Ok(()), - } - } - - pub async fn read_media_files( - &self, - paths: Vec, - max_bytes: Option, - ) -> ServiceResult> { - let results = stream::iter(paths) - .map(|path| async { - self.read_media_file(Path::new(&path), max_bytes) - .await - .map_err(|e| (path, e)) - }) - .buffer_unordered(MAX_CONCURRENT_FILE_READ) // Process up to MAX_CONCURRENT_FILE_READ files concurrently - .filter_map(|result| async move { result.ok() }) - .collect::>() - .await; - Ok(results) - } - - pub async fn read_media_file( - &self, - file_path: &Path, - max_bytes: Option, - ) -> ServiceResult<(infer::Type, String)> { - let allowed_directories = self.allowed_directories().await; - let valid_path = self.validate_path(file_path, allowed_directories)?; - self.validate_file_size(&valid_path, None, max_bytes) - .await?; - let kind = self.mime_from_path(&valid_path)?; - let content = self.read_file_as_base64(&valid_path).await?; - Ok((kind, content)) - } - - // reads file as base64 efficiently in a streaming manner - async fn read_file_as_base64(&self, file_path: &Path) -> ServiceResult { - let file = File::open(file_path).await?; - let mut reader = BufReader::new(file); - - let mut output = Vec::new(); - { - // Wrap output Vec in a Base64 encoder writer - let mut encoder = EncoderWriter::new(&mut output, &general_purpose::STANDARD); - - let mut buffer = [0u8; 8192]; - loop { - let n = reader.read(&mut buffer).await?; - if n == 0 { - break; - } - // Write raw bytes to the Base64 encoder - encoder.write_all(&buffer[..n])?; - } - // Make sure to flush any remaining bytes - encoder.flush()?; - } // drop encoder before consuming output - - // Convert the Base64 bytes to String (safe UTF-8) - let base64_string = - String::from_utf8(output).map_err(|err| ServiceError::FromString(format!("{err}")))?; - Ok(base64_string) - } - - pub async fn read_text_file( - &self, - file_path: &Path, - with_line_numbers: bool, - ) -> ServiceResult { - let allowed_directories = self.allowed_directories().await; - let valid_path = self.validate_path(file_path, allowed_directories)?; - let content = tokio::fs::read_to_string(valid_path).await?; - - if with_line_numbers { - Ok(content - .lines() - .enumerate() - .map(|(i, line)| format!("{:>6} | {}", i + 1, line)) - .collect::>() - .join("\n")) - } else { - Ok(content) - } - } - - pub async fn create_directory(&self, file_path: &Path) -> ServiceResult<()> { - let allowed_directories = self.allowed_directories().await; - let valid_path = self.validate_path(file_path, allowed_directories)?; - tokio::fs::create_dir_all(valid_path).await?; - Ok(()) - } - - pub async fn move_file(&self, src_path: &Path, dest_path: &Path) -> ServiceResult<()> { - let allowed_directories = self.allowed_directories().await; - let valid_src_path = self.validate_path(src_path, allowed_directories.clone())?; - let valid_dest_path = self.validate_path(dest_path, allowed_directories)?; - tokio::fs::rename(valid_src_path, valid_dest_path).await?; - Ok(()) - } - - pub async fn list_directory(&self, dir_path: &Path) -> ServiceResult> { - let allowed_directories = self.allowed_directories().await; - - let valid_path = self.validate_path(dir_path, allowed_directories)?; - - let mut dir = tokio::fs::read_dir(valid_path).await?; - - let mut entries = Vec::new(); - - // Use a loop to collect the directory entries - while let Some(entry) = dir.next_entry().await? { - entries.push(entry); - } - - Ok(entries) - } - - pub async fn write_file(&self, file_path: &Path, content: &String) -> ServiceResult<()> { - let allowed_directories = self.allowed_directories().await; - let valid_path = self.validate_path(file_path, allowed_directories)?; - tokio::fs::write(valid_path, content).await?; - Ok(()) - } - - /// Searches for files in the directory tree starting at `root_path` that match the given `pattern`, - /// excluding paths that match any of the `exclude_patterns`. - /// - /// # Arguments - /// * `root_path` - The root directory to start the search from. - /// * `pattern` - A glob pattern to match file names (case-insensitive). If no wildcards are provided, - /// the pattern is wrapped in '*' for partial matching. - /// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive). - /// - /// # Returns - /// A `ServiceResult` containing a vector of`walkdir::DirEntry` objects for matching files, - /// or a `ServiceError` if an error occurs. - pub async fn search_files( - &self, - root_path: &Path, - pattern: String, - exclude_patterns: Vec, - min_bytes: Option, - max_bytes: Option, - ) -> ServiceResult> { - let result = self - .search_files_iter(root_path, pattern, exclude_patterns, min_bytes, max_bytes) - .await?; - Ok(result.collect::>()) - } - - /// Returns an iterator over files in the directory tree starting at `root_path` that match - /// the given `pattern`, excluding paths that match any of the `exclude_patterns`. - /// - /// # Arguments - /// * `root_path` - The root directory to start the search from. - /// * `pattern` - A glob pattern to match file names. If no wildcards are provided, the pattern is wrapped in `**/*{pattern}*` for partial matching. - /// * `exclude_patterns` - A list of glob patterns to exclude paths (case-sensitive). - /// - /// # Returns - /// A `ServiceResult` containing an iterator yielding `walkdir::DirEntry` objects for matching files, - /// or a `ServiceError` if an error occurs. - pub async fn search_files_iter<'a>( - &'a self, - // root_path: impl Into, - root_path: &'a Path, - pattern: String, - exclude_patterns: Vec, - min_bytes: Option, - max_bytes: Option, - ) -> ServiceResult + 'a> { - let allowed_directories = self.allowed_directories().await; - let valid_path = self.validate_path(root_path, allowed_directories.clone())?; - - let updated_pattern = if pattern.contains('*') { - pattern.to_lowercase() - } else { - format!("**/*{}*", &pattern.to_lowercase()) - }; - let glob_pattern = updated_pattern; - - let result = WalkDir::new(valid_path) - .follow_links(true) - .into_iter() - .filter_entry(move |dir_entry| { - let full_path = dir_entry.path(); - - // Validate each path before processing - let validated_path = self - .validate_path(full_path, allowed_directories.clone()) - .ok(); - - if validated_path.is_none() { - // Skip invalid paths during search - return false; - } - - // Get the relative path from the root_path - let relative_path = full_path.strip_prefix(root_path).unwrap_or(full_path); - - let mut should_exclude = exclude_patterns.iter().any(|pattern| { - let glob_pattern = if pattern.contains('*') { - pattern.strip_prefix("/").unwrap_or(pattern).to_owned() - } else { - format!("*{pattern}*") - }; - - glob_match(&glob_pattern, relative_path.to_str().unwrap_or("")) - }); - - // enforce min/max bytes - if !should_exclude && (min_bytes.is_none() || max_bytes.is_none()) { - match dir_entry.metadata().ok() { - Some(metadata) => { - if !self.filesize_in_range(metadata.len(), min_bytes, max_bytes) { - should_exclude = true; - } - } - None => { - should_exclude = true; - } - } - } - - !should_exclude - }) - .filter_map(|v| v.ok()) - .filter(move |entry| { - if root_path == entry.path() { - return false; - } - - glob_match( - &glob_pattern, - &entry.file_name().to_str().unwrap_or("").to_lowercase(), - ) - }); - - Ok(result) - } - - /// Generates a JSON representation of a directory tree starting at the given path. - /// - /// This function recursively builds a JSON array object representing the directory structure, - /// where each entry includes a `name` (file or directory name), `type` ("file" or "directory"), - /// and for directories, a `children` array containing their contents. Files do not have a - /// `children` field. - /// - /// The function supports optional constraints to limit the tree size: - /// - `max_depth`: Limits the depth of directory traversal. - /// - `max_files`: Limits the total number of entries (files and directories). - /// - /// # IMPORTANT NOTE - /// - /// use max_depth or max_files could lead to partial or skewed representations of actual directory tree - pub fn directory_tree>( - &self, - root_path: P, - max_depth: Option, - max_files: Option, - current_count: &mut usize, - allowed_directories: Arc>, - ) -> ServiceResult<(Value, bool)> { - let valid_path = self.validate_path(root_path.as_ref(), allowed_directories.clone())?; - - let metadata = fs::metadata(&valid_path)?; - if !metadata.is_dir() { - return Err(ServiceError::FromString( - "Root path must be a directory".into(), - )); - } - - let mut children = Vec::new(); - let mut reached_max_depth = false; - - if max_depth != Some(0) { - for entry in WalkDir::new(valid_path) - .min_depth(1) - .max_depth(1) - .follow_links(true) - .into_iter() - .filter_map(|e| e.ok()) - { - let child_path = entry.path(); - let metadata = fs::metadata(child_path)?; - - let entry_name = child_path - .file_name() - .ok_or(ServiceError::FromString("Invalid path".to_string()))? - .to_string_lossy() - .into_owned(); - - // Increment the count for this entry - *current_count += 1; - - // Check if we've exceeded max_files (if set) - if let Some(max) = max_files { - if *current_count > max { - continue; // Skip this entry but continue processing others - } - } - - let mut json_entry = json!({ - "name": entry_name, - "type": if metadata.is_dir() { "directory" } else { "file" } - }); - - if metadata.is_dir() { - let next_depth = max_depth.map(|d| d - 1); - let (child_children, child_reached_max_depth) = self.directory_tree( - child_path, - next_depth, - max_files, - current_count, - allowed_directories.clone(), - )?; - json_entry - .as_object_mut() - .unwrap() - .insert("children".to_string(), child_children); - reached_max_depth |= child_reached_max_depth; - } - children.push(json_entry); - } - } else { - // If max_depth is 0, we skip processing this directory's children - reached_max_depth = true; - } - Ok((Value::Array(children), reached_max_depth)) - } - - pub fn create_unified_diff( - &self, - original_content: &str, - new_content: &str, - filepath: Option, - ) -> String { - // Ensure consistent line endings for diff - let normalized_original = normalize_line_endings(original_content); - let normalized_new = normalize_line_endings(new_content); - - // // Generate the diff using TextDiff - let diff = TextDiff::from_lines(&normalized_original, &normalized_new); - - let file_name = filepath.unwrap_or("file".to_string()); - // Format the diff as a unified diff - let patch = diff - .unified_diff() - .header( - format!("{file_name}\toriginal").as_str(), - format!("{file_name}\tmodified").as_str(), - ) - .context_radius(4) - .to_string(); - - format!("Index: {}\n{}\n{}", file_name, "=".repeat(68), patch) - } - - pub async fn apply_file_edits( - &self, - file_path: &Path, - edits: Vec, - dry_run: Option, - save_to: Option<&Path>, - ) -> ServiceResult { - let allowed_directories = self.allowed_directories().await; - let valid_path = self.validate_path(file_path, allowed_directories)?; - - // Read file content and normalize line endings - let content_str = tokio::fs::read_to_string(&valid_path).await?; - let original_line_ending = self.detect_line_ending(&content_str); - let content_str = normalize_line_endings(&content_str); - - // Apply edits sequentially - let mut modified_content = content_str.clone(); - - for edit in edits { - let normalized_old = normalize_line_endings(&edit.old_text); - let normalized_new = normalize_line_endings(&edit.new_text); - // If exact match exists, use it - if modified_content.contains(&normalized_old) { - modified_content = modified_content.replacen(&normalized_old, &normalized_new, 1); - continue; - } - - // Otherwise, try line-by-line matching with flexibility for whitespace - let old_lines: Vec = normalized_old - .trim_end() - .split('\n') - .map(|s| s.to_string()) - .collect(); - - let content_lines: Vec = modified_content - .trim_end() - .split('\n') - .map(|s| s.to_string()) - .collect(); - - let mut match_found = false; - - // skip when the match is impossible: - if old_lines.len() > content_lines.len() { - let error_message = format!( - "Cannot apply edit: the original text spans more lines ({}) than the file content ({}).", - old_lines.len(), - content_lines.len() - ); - - return Err(RpcError::internal_error() - .with_message(error_message) - .into()); - } - - let max_start = content_lines.len().saturating_sub(old_lines.len()); - for i in 0..=max_start { - let potential_match = &content_lines[i..i + old_lines.len()]; - - // Compare lines with normalized whitespace - let is_match = old_lines.iter().enumerate().all(|(j, old_line)| { - let content_line = &potential_match[j]; - old_line.trim() == content_line.trim() - }); - - if is_match { - // Preserve original indentation of first line - let original_indent = content_lines[i] - .chars() - .take_while(|&c| c.is_whitespace()) - .collect::(); - - let new_lines: Vec = normalized_new - .split('\n') - .enumerate() - .map(|(j, line)| { - // Keep indentation of the first line - if j == 0 { - return format!("{}{}", original_indent, line.trim_start()); - } - - // For subsequent lines, preserve relative indentation and original whitespace type - let old_indent = old_lines - .get(j) - .map(|line| { - line.chars() - .take_while(|&c| c.is_whitespace()) - .collect::() - }) - .unwrap_or_default(); - - let new_indent = line - .chars() - .take_while(|&c| c.is_whitespace()) - .collect::(); - - // Use the same whitespace character as original_indent (tabs or spaces) - let indent_char = if original_indent.contains('\t') { - "\t" - } else { - " " - }; - let relative_indent = if new_indent.len() >= old_indent.len() { - new_indent.len() - old_indent.len() - } else { - 0 // Don't reduce indentation below original - }; - format!( - "{}{}{}", - &original_indent, - &indent_char.repeat(relative_indent), - line.trim_start() - ) - }) - .collect(); - - let mut content_lines = content_lines.clone(); - content_lines.splice(i..i + old_lines.len(), new_lines); - modified_content = content_lines.join("\n"); - match_found = true; - break; - } - } - if !match_found { - return Err(RpcError::internal_error() - .with_message(format!( - "Could not find exact match for edit:\n{}", - edit.old_text - )) - .into()); - } - } - - let diff = self.create_unified_diff( - &content_str, - &modified_content, - Some(valid_path.display().to_string()), - ); - - // Format diff with appropriate number of backticks - let mut num_backticks = 3; - while diff.contains(&"`".repeat(num_backticks)) { - num_backticks += 1; - } - let formatted_diff = format!( - "{}diff\n{}{}\n\n", - "`".repeat(num_backticks), - diff, - "`".repeat(num_backticks) - ); - - let is_dry_run = dry_run.unwrap_or(false); - - if !is_dry_run { - let target = save_to.unwrap_or(valid_path.as_path()); - let modified_content = modified_content.replace("\n", original_line_ending); - tokio::fs::write(target, modified_content).await?; - } - - Ok(formatted_diff) - } - - pub fn escape_regex(&self, text: &str) -> String { - // Covers special characters in regex engines (RE2, PCRE, JS, Python) - const SPECIAL_CHARS: &[char] = &[ - '.', '^', '$', '*', '+', '?', '(', ')', '[', ']', '{', '}', '\\', '|', '/', - ]; - - let mut escaped = String::with_capacity(text.len()); - - for ch in text.chars() { - if SPECIAL_CHARS.contains(&ch) { - escaped.push('\\'); - } - escaped.push(ch); - } - - escaped - } - - // Searches the content of a file for occurrences of the given query string. - /// - /// This method searches the file specified by `file_path` for lines matching the `query`. - /// The search can be performed as a regular expression or as a literal string, - /// depending on the `is_regex` flag. - /// - /// If matched line is larger than 255 characters, a snippet will be extracted around the matched text. - /// - pub fn content_search( - &self, - query: &str, - file_path: impl AsRef, - is_regex: Option, - ) -> ServiceResult> { - let query = if is_regex.unwrap_or_default() { - query.to_string() - } else { - self.escape_regex(query) - }; - - let matcher = RegexMatcherBuilder::new() - .case_insensitive(true) - .build(query.as_str())?; - - let mut searcher = Searcher::new(); - let mut result = FileSearchResult { - file_path: file_path.as_ref().to_path_buf(), - matches: vec![], - }; - - searcher.set_binary_detection(BinaryDetection::quit(b'\x00')); - - searcher.search_path( - &matcher, - file_path, - UTF8(|line_number, line| { - let actual_match = matcher.find(line.as_bytes())?.unwrap(); - - result.matches.push(ContentMatchResult { - line_number, - start_pos: actual_match.start(), - line_text: self.extract_snippet(line, actual_match, None, None), - }); - Ok(true) - }), - )?; - - if result.matches.is_empty() { - return Ok(None); - } - - Ok(Some(result)) - } - - /// Extracts a snippet from a given line of text around a match. - /// - /// It extracts a substring starting a fixed number of characters (`SNIPPET_BACKWARD_CHARS`) - /// before the start position of the `match`, and extends up to `max_length` characters - /// If the snippet does not include the beginning or end of the original line, ellipses (`"..."`) are added - /// to indicate the truncation. - pub fn extract_snippet( - &self, - line: &str, - match_result: Match, - max_length: Option, - backward_chars: Option, - ) -> String { - let max_length = max_length.unwrap_or(SNIPPET_MAX_LENGTH); - let backward_chars = backward_chars.unwrap_or(SNIPPET_BACKWARD_CHARS); - - // Calculate the number of leading whitespace bytes to adjust for trimmed input - let start_pos = line.len() - line.trim_start().len(); - // Trim leading and trailing whitespace from the input line - let line = line.trim(); - - // Calculate the desired start byte index by adjusting match start for trimming and backward chars - // match_result.start() is the byte index in the original string - // Subtract start_pos to account for trimmed whitespace and backward_chars to include context before the match - let desired_start = (match_result.start() - start_pos).saturating_sub(backward_chars); - - // Find the nearest valid UTF-8 character boundary at or after desired_start - // Prevents "byte index is not a char boundary" panic by ensuring the slice starts at a valid character (issue #37) - let snippet_start = line - .char_indices() - .map(|(i, _)| i) - .find(|&i| i >= desired_start) - .unwrap_or(desired_start.min(line.len())); - // Initialize a counter for tracking characters to respect max_length - let mut char_count = 0; - - // Calculate the desired end byte index by counting max_length characters from snippet_start - // Take max_length + 1 to find the boundary after the last desired character - let desired_end = line[snippet_start..] - .char_indices() - .take(max_length + 1) - .find(|&(_, _)| { - char_count += 1; - char_count > max_length - }) - .map(|(i, _)| snippet_start + i) - .unwrap_or(line.len()); - - // Ensure snippet_end is a valid UTF-8 character boundary at or after desired_end - // This prevents slicing issues with multi-byte characters - let snippet_end = line - .char_indices() - .map(|(i, _)| i) - .find(|&i| i >= desired_end) - .unwrap_or(line.len()); - - // Cap snippet_end to avoid exceeding the string length - let snippet_end = snippet_end.min(line.len()); - - // Extract the snippet from the trimmed line using the calculated byte indices - let snippet = &line[snippet_start..snippet_end]; - - let mut result = String::new(); - // Add leading ellipsis if the snippet doesn't start at the beginning of the trimmed line - if snippet_start > 0 { - result.push_str("..."); - } - - result.push_str(snippet); - - // Add trailing ellipsis if the snippet doesn't reach the end of the trimmed line - if snippet_end < line.len() { - result.push_str("..."); - } - result - } - - #[allow(clippy::too_many_arguments)] - pub async fn search_files_content( - &self, - root_path: impl AsRef, - pattern: &str, - query: &str, - is_regex: bool, - exclude_patterns: Option>, - min_bytes: Option, - max_bytes: Option, - ) -> ServiceResult> { - let files_iter = self - .search_files_iter( - root_path.as_ref(), - pattern.to_string(), - exclude_patterns.to_owned().unwrap_or_default(), - min_bytes, - max_bytes, - ) - .await?; - - let results: Vec = files_iter - .filter_map(|entry| { - self.content_search(query, entry.path(), Some(is_regex)) - .ok() - .and_then(|v| v) - }) - .collect(); - Ok(results) - } - - /// Reads the first n lines from a text file, preserving line endings. - /// Args: - /// file_path: Path to the file - /// n: Number of lines to read - /// Returns a String containing the first n lines with original line endings or an error if the path is invalid or file cannot be read. - pub async fn head_file(&self, file_path: &Path, n: usize) -> ServiceResult { - // Validate file path against allowed directories - let allowed_directories = self.allowed_directories().await; - let valid_path = self.validate_path(file_path, allowed_directories)?; - - // Open file asynchronously and create a BufReader - let file = File::open(&valid_path).await?; - let mut reader = BufReader::new(file); - let mut result = String::with_capacity(n * 100); // Estimate capacity (avg 100 bytes/line) - let mut count = 0; - - // Read lines asynchronously, preserving line endings - let mut line = Vec::new(); - while count < n { - line.clear(); - let bytes_read = reader.read_until(b'\n', &mut line).await?; - if bytes_read == 0 { - break; // Reached EOF - } - result.push_str(&String::from_utf8_lossy(&line)); - count += 1; - } - - Ok(result) - } - - /// Reads the last n lines from a text file, preserving line endings. - /// Args: - /// file_path: Path to the file - /// n: Number of lines to read - /// Returns a String containing the last n lines with original line endings or an error if the path is invalid or file cannot be read. - pub async fn tail_file(&self, file_path: &Path, n: usize) -> ServiceResult { - // Validate file path against allowed directories - let allowed_directories = self.allowed_directories().await; - let valid_path = self.validate_path(file_path, allowed_directories)?; - - // Open file asynchronously - let file = File::open(&valid_path).await?; - let file_size = file.metadata().await?.len(); - - // If file is empty or n is 0, return empty string - if file_size == 0 || n == 0 { - return Ok(String::new()); - } - - // Create a BufReader - let mut reader = BufReader::new(file); - let mut line_count = 0; - let mut pos = file_size; - let chunk_size = 8192; // 8KB chunks - let mut buffer = vec![0u8; chunk_size]; - let mut newline_positions = Vec::new(); - - // Read backwards to collect all newline positions - while pos > 0 { - let read_size = chunk_size.min(pos as usize); - pos -= read_size as u64; - reader.seek(SeekFrom::Start(pos)).await?; - let read_bytes = reader.read_exact(&mut buffer[..read_size]).await?; - - // Process chunk in reverse to find newlines - for (i, byte) in buffer[..read_bytes].iter().enumerate().rev() { - if *byte == b'\n' { - newline_positions.push(pos + i as u64); - line_count += 1; - } - } - } - - // Check if file ends with a non-newline character (partial last line) - if file_size > 0 { - let mut temp_reader = BufReader::new(File::open(&valid_path).await?); - temp_reader.seek(SeekFrom::End(-1)).await?; - let mut last_byte = [0u8; 1]; - temp_reader.read_exact(&mut last_byte).await?; - if last_byte[0] != b'\n' { - line_count += 1; - } - } - - // Determine start position for reading the last n lines - let start_pos = if line_count <= n { - 0 // Read from start if fewer than n lines - } else { - *newline_positions.get(line_count - n).unwrap_or(&0) + 1 - }; - - // Read forward from start_pos - reader.seek(SeekFrom::Start(start_pos)).await?; - let mut result = String::with_capacity(n * 100); // Estimate capacity - let mut line = Vec::new(); - let mut lines_read = 0; - - while lines_read < n { - line.clear(); - let bytes_read = reader.read_until(b'\n', &mut line).await?; - if bytes_read == 0 { - // Handle partial last line at EOF - if !line.is_empty() { - result.push_str(&String::from_utf8_lossy(&line)); - } - break; - } - result.push_str(&String::from_utf8_lossy(&line)); - lines_read += 1; - } - - Ok(result) - } - - /// Reads lines from a text file starting at the specified offset (0-based), preserving line endings. - /// Args: - /// path: Path to the file - /// offset: Number of lines to skip (0-based) - /// limit: Optional maximum number of lines to read - /// Returns a String containing the selected lines with original line endings or an error if the path is invalid or file cannot be read. - pub async fn read_file_lines( - &self, - path: &Path, - offset: usize, - limit: Option, - ) -> ServiceResult { - // Validate file path against allowed directories - let allowed_directories = self.allowed_directories().await; - let valid_path = self.validate_path(path, allowed_directories)?; - - // Open file and get metadata before moving into BufReader - let file = File::open(&valid_path).await?; - let file_size = file.metadata().await?.len(); - let mut reader = BufReader::new(file); - - // If file is empty or limit is 0, return empty string - if file_size == 0 || limit == Some(0) { - return Ok(String::new()); - } - - // Skip offset lines (0-based indexing) - let mut buffer = Vec::new(); - for _ in 0..offset { - buffer.clear(); - if reader.read_until(b'\n', &mut buffer).await? == 0 { - return Ok(String::new()); // EOF before offset - } - } - - // Read lines up to limit (or all remaining if limit is None) - let mut result = String::with_capacity(limit.unwrap_or(100) * 100); // Estimate capacity - match limit { - Some(max_lines) => { - for _ in 0..max_lines { - buffer.clear(); - let bytes_read = reader.read_until(b'\n', &mut buffer).await?; - if bytes_read == 0 { - break; // Reached EOF - } - result.push_str(&String::from_utf8_lossy(&buffer)); - } - } - None => { - loop { - buffer.clear(); - let bytes_read = reader.read_until(b'\n', &mut buffer).await?; - if bytes_read == 0 { - break; // Reached EOF - } - result.push_str(&String::from_utf8_lossy(&buffer)); - } - } - } - - Ok(result) - } - - /// Calculates the total size (in bytes) of all files within a directory tree. - /// - /// This function recursively searches the specified `root_path` for files, - /// filters out directories and non-file entries, and sums the sizes of all found files. - /// The size calculation is parallelized using Rayon for improved performance on large directories. - /// - /// # Arguments - /// * `root_path` - The root directory path to start the size calculation. - /// - /// # Returns - /// Returns a `ServiceResult` containing the total size in bytes of all files under the `root_path`. - /// - /// # Notes - /// - Only files are included in the size calculation; directories and other non-file entries are ignored. - /// - The search pattern is `"**/*"` (all files) and no exclusions are applied. - /// - Parallel iteration is used to speed up the metadata fetching and summation. - pub async fn calculate_directory_size(&self, root_path: &Path) -> ServiceResult { - let entries = self - .search_files_iter(root_path, "**/*".to_string(), vec![], None, None) - .await? - .filter(|e| e.file_type().is_file()); // Only process files - - // Use rayon to parallelize size summation - let total_size: u64 = entries - .par_bridge() // Convert to parallel iterator - .filter_map(|entry| entry.metadata().ok().map(|meta| meta.len())) - .sum(); - - Ok(total_size) - } - - /// Recursively finds all empty directories within the given root path. - /// - /// A directory is considered empty if it contains no files in itself or any of its subdirectories - /// except OS metadata files: `.DS_Store` (macOS) and `Thumbs.db` (Windows) - /// Empty subdirectories are allowed. You can optionally provide a list of glob-style patterns in - /// `exclude_patterns` to ignore certain paths during the search (e.g., to skip system folders or hidden directories). - /// - /// # Arguments - /// - `root_path`: The starting directory to search. - /// - `exclude_patterns`: Optional list of glob patterns to exclude from the search. - /// Directories matching these patterns will be ignored. - /// - /// # Errors - /// Returns an error if the root path is invalid or inaccessible. - /// - /// # Returns - /// A list of paths to empty directories, as strings, including parent directories that contain only empty subdirectories. - /// Recursively finds all empty directories within the given root path. - /// - /// A directory is considered empty if it contains no files in itself or any of its subdirectories. - /// Empty subdirectories are allowed. You can optionally provide a list of glob-style patterns in - /// `exclude_patterns` to ignore certain paths during the search (e.g., to skip system folders or hidden directories). - /// - /// # Arguments - /// - `root_path`: The starting directory to search. - /// - `exclude_patterns`: Optional list of glob patterns to exclude from the search. - /// Directories matching these patterns will be ignored. - /// - /// # Errors - /// Returns an error if the root path is invalid or inaccessible. - /// - /// # Returns - /// A list of paths to all empty directories, as strings, including parent directories that contain only empty subdirectories. - pub async fn find_empty_directories( - &self, - root_path: &Path, - exclude_patterns: Option>, - ) -> ServiceResult> { - let walker = self - .search_files_iter( - root_path, - "**/*".to_string(), - exclude_patterns.unwrap_or_default(), - None, - None, - ) - .await? - .filter(|e| e.file_type().is_dir()); // Only directories - - let mut empty_dirs = Vec::new(); - - // Check each directory for emptiness - for entry in walker { - let is_empty = WalkDir::new(entry.path()) - .into_iter() - .filter_map(|e| e.ok()) - .all(|e| !e.file_type().is_file() || is_system_metadata_file(e.file_name())); // Directory is empty if no files are found in it or subdirs, ".DS_Store" will be ignores on Mac - - if is_empty { - if let Some(path_str) = entry.path().to_str() { - empty_dirs.push(path_str.to_string()); - } - } - } - - Ok(empty_dirs) - } - - /// Finds groups of duplicate files within the given root path. - /// Returns a vector of vectors, where each inner vector contains paths to files with identical content. - /// Files are considered duplicates if they have the same size and SHA-256 hash. - pub async fn find_duplicate_files( - &self, - root_path: &Path, - pattern: Option, - exclude_patterns: Option>, - min_bytes: Option, - max_bytes: Option, - ) -> ServiceResult>> { - // Validate root path against allowed directories - let allowed_directories = self.allowed_directories().await; - let valid_path = self.validate_path(root_path, allowed_directories)?; - - // Get Tokio runtime handle - let rt = tokio::runtime::Handle::current(); - - // Step 1: Collect files and group by size - let mut size_map: HashMap> = HashMap::new(); - let entries = self - .search_files_iter( - &valid_path, - pattern.unwrap_or("**/*".to_string()), - exclude_patterns.unwrap_or_default(), - min_bytes, - max_bytes, - ) - .await? - .filter(|e| e.file_type().is_file()); // Only files - - for entry in entries { - if let Ok(metadata) = entry.metadata() { - if let Some(path_str) = entry.path().to_str() { - size_map - .entry(metadata.len()) - .or_default() - .push(path_str.to_string()); - } - } - } - - // Filter out sizes with only one file (no duplicates possible) - let size_groups: Vec> = size_map - .into_iter() - .collect::>() // Collect into Vec to enable parallel iteration - .into_par_iter() - .filter(|(_, paths)| paths.len() > 1) - .map(|(_, paths)| paths) - .collect(); - - // Step 2: Group by quick hash (first 4KB) - let mut quick_hash_map: HashMap, Vec> = HashMap::new(); - for paths in size_groups.into_iter() { - let quick_hashes: Vec<(String, Vec)> = paths - .into_par_iter() - .filter_map(|path| { - let rt = rt.clone(); // Clone the runtime handle for this task - rt.block_on(async { - let file = File::open(&path).await.ok()?; - let mut reader = tokio::io::BufReader::new(file); - let mut buffer = vec![0u8; 4096]; // Read first 4KB - let bytes_read = reader.read(&mut buffer).await.ok()?; - let mut hasher = Sha256::new(); - hasher.update(&buffer[..bytes_read]); - Some((path, hasher.finalize().to_vec())) - }) - }) - .collect(); - - for (path, hash) in quick_hashes { - quick_hash_map.entry(hash).or_default().push(path); - } - } - - // Step 3: Group by full hash for groups with multiple files - let mut full_hash_map: HashMap, Vec> = HashMap::new(); - let filtered_quick_hashes: Vec<(Vec, Vec)> = quick_hash_map - .into_iter() - .collect::>() - .into_par_iter() - .filter(|(_, paths)| paths.len() > 1) - .collect(); - - for (_quick_hash, paths) in filtered_quick_hashes { - let full_hashes: Vec<(String, Vec)> = paths - .into_par_iter() - .filter_map(|path| { - let rt = rt.clone(); // Clone the runtime handle for this task - rt.block_on(async { - let file = File::open(&path).await.ok()?; - let mut reader = tokio::io::BufReader::new(file); - let mut hasher = Sha256::new(); - let mut buffer = vec![0u8; 8192]; // 8KB chunks - loop { - let bytes_read = reader.read(&mut buffer).await.ok()?; - if bytes_read == 0 { - break; - } - hasher.update(&buffer[..bytes_read]); - } - Some((path, hasher.finalize().to_vec())) - }) - }) - .collect(); - - for (path, hash) in full_hashes { - full_hash_map.entry(hash).or_default().push(path); - } - } - - // Collect groups of duplicates (only groups with more than one file) - let duplicates: Vec> = full_hash_map - .into_values() - .filter(|group| group.len() > 1) - .collect(); - - Ok(duplicates) - } -} +pub use core::FileSystemService; +pub use io::FileInfo; +pub use search::FileSearchResult;