AirLibrary/Indexing/Scan/
ScanFile.rs1use std::{
72 path::PathBuf,
73 time::{Duration, Instant},
74};
75
76use crate::dev_log;
78use crate::{
79 AirError,
80 Configuration::IndexingConfig,
81 Indexing::{
82 Process::{
83 ExtractSymbols::ExtractSymbols,
84 ProcessContent::{DetectEncoding, DetectLanguage, DetectMimeType},
85 },
86 State::CreateState::{FileMetadata, SymbolInfo},
87 },
88 Result,
89};
90
91pub async fn IndexFileInternal(
103 file_path:&PathBuf,
104 config:&IndexingConfig,
105 _patterns:&[String],
106) -> Result<(FileMetadata, Vec<SymbolInfo>)> {
107 let start_time = Instant::now();
108
109 let metadata = std::fs::metadata(file_path)
111 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
112
113 let modified = metadata
115 .modified()
116 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
117
118 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
119
120 let file_size = metadata.len();
122 if file_size > config.MaxFileSizeMb as u64 * 1024 * 1024 {
123 return Err(AirError::FileSystem(format!(
124 "File size {} exceeds limit {} MB",
125 file_size, config.MaxFileSizeMb
126 )));
127 }
128
129 let content = tokio::time::timeout(Duration::from_secs(30), tokio::fs::read(file_path))
131 .await
132 .map_err(|_| AirError::FileSystem(format!("Timeout reading file: {} (30s limit)", file_path.display())))?
133 .map_err(|e| AirError::FileSystem(format!("Failed to read file: {}", e)))?;
134
135 let is_symlink = std::fs::symlink_metadata(file_path)
137 .map(|m| m.file_type().is_symlink())
138 .unwrap_or(false);
139
140 let checksum = CalculateChecksum(&content);
142
143 let encoding = DetectEncoding(&content);
145
146 let mime_type = DetectMimeType(file_path, &content);
148
149 let language = DetectLanguage(file_path);
151
152 let line_count = if mime_type.starts_with("text/") {
154 Some(content.iter().filter(|&&b| b == b'\n').count() as u32 + 1)
155 } else {
156 None
157 };
158
159 let symbols = if let Some(lang) = &language {
161 ExtractSymbols(file_path, &content, lang).await?
162 } else {
163 Vec::new()
164 };
165
166 let permissions = GetPermissionsString(&metadata);
167
168 let elapsed = start_time.elapsed();
169
170 dev_log!(
171 "indexing",
172 "indexed {} in {}ms ({} symbols)",
173 file_path.display(),
174 elapsed.as_millis(),
175 symbols.len()
176 );
177
178 Ok((
179 FileMetadata {
180 path:file_path.clone(),
181 size:file_size,
182 modified:modified_time,
183 mime_type,
184 language,
185 line_count,
186 checksum,
187 is_symlink,
188 permissions,
189 encoding,
190 indexed_at:chrono::Utc::now(),
191 symbol_count:symbols.len() as u32,
192 },
193 symbols,
194 ))
195}
196
197pub async fn ValidateFileAccess(file_path:&PathBuf) -> bool {
199 tokio::task::spawn_blocking({
200 let file_path = file_path.to_path_buf();
201 move || {
202 let can_access = std::fs::metadata(&file_path).is_ok();
204 if can_access {
205 std::fs::File::open(&file_path).is_ok()
207 } else {
208 false
209 }
210 }
211 })
212 .await
213 .unwrap_or(false)
214}
215
216pub fn CalculateChecksum(content:&[u8]) -> String {
218 use sha2::{Digest, Sha256};
223 let mut hasher = Sha256::new();
224 hasher.update(content);
225 hex::encode(hasher.finalize())
226}
227
228#[cfg(unix)]
230pub fn GetPermissionsString(metadata:&std::fs::Metadata) -> String {
231 use std::os::unix::fs::PermissionsExt;
232 let mode = metadata.permissions().mode();
233 let mut perms = String::new();
234 perms.push(if mode & 0o400 != 0 { 'r' } else { '-' });
236 perms.push(if mode & 0o200 != 0 { 'w' } else { '-' });
238 perms.push(if mode & 0o100 != 0 { 'x' } else { '-' });
240 perms.push(if mode & 0o040 != 0 { 'r' } else { '-' });
242 perms.push(if mode & 0o020 != 0 { 'w' } else { '-' });
243 perms.push(if mode & 0o010 != 0 { 'x' } else { '-' });
244 perms.push(if mode & 0o004 != 0 { 'r' } else { '-' });
246 perms.push(if mode & 0o002 != 0 { 'w' } else { '-' });
247 perms.push(if mode & 0o001 != 0 { 'x' } else { '-' });
248 perms
249}
250
251#[cfg(not(unix))]
253pub fn GetPermissionsString(_metadata:&std::fs::Metadata) -> String { "--------".to_string() }
254
255pub async fn ScanFileMetadata(file_path:&PathBuf) -> Result<FileMetadata> {
257 let metadata = std::fs::metadata(file_path)
258 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
259
260 let modified = metadata
261 .modified()
262 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
263
264 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
265
266 Ok(FileMetadata {
267 path:file_path.clone(),
268 size:metadata.len(),
269 modified:modified_time,
270 mime_type:"application/octet-stream".to_string(),
271 language:None,
272 line_count:None,
273 checksum:String::new(),
274 is_symlink:metadata.file_type().is_symlink(),
275 permissions:GetPermissionsString(&metadata),
276 encoding:None,
277 indexed_at:chrono::Utc::now(),
278 symbol_count:0,
279 })
280}
281
282pub fn FileModifiedSince(file_path:&PathBuf, last_indexed:chrono::DateTime<chrono::Utc>) -> Result<bool> {
284 let metadata = std::fs::metadata(file_path)
285 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
286
287 let modified = metadata
288 .modified()
289 .map_err(|e| AirError::FileSystem(format!("Failed to get modification time: {}", e)))?;
290
291 let modified_time = chrono::DateTime::<chrono::Utc>::from(modified);
292
293 Ok(modified_time > last_indexed)
294}
295
296pub async fn GetFileSize(file_path:&PathBuf) -> Result<u64> {
298 tokio::task::spawn_blocking({
299 let file_path = file_path.to_path_buf();
300 move || {
301 let metadata = std::fs::metadata(&file_path)
302 .map_err(|e| AirError::FileSystem(format!("Failed to get file metadata: {}", e)))?;
303 Ok(metadata.len())
304 }
305 })
306 .await?
307}
308
309pub fn IsTextFile(metadata:&FileMetadata) -> bool {
311 metadata.mime_type.starts_with("text/")
312 || metadata.mime_type.contains("json")
313 || metadata.mime_type.contains("xml")
314 || metadata.mime_type.contains("yaml")
315 || metadata.mime_type.contains("toml")
316 || metadata.language.is_some()
317}
318
319pub fn IsBinaryFile(metadata:&FileMetadata) -> bool {
321 !IsTextFile(metadata)
322 || metadata.mime_type == "application/octet-stream"
323 || metadata.mime_type == "application/zip"
324 || metadata.mime_type == "application/x-tar"
325 || metadata.mime_type == "application/x-gzip"
326 || metadata.mime_type == "application/x-bzip2"
327}