Spaces:
Sleeping
Sleeping
Update core/file_scanner.py
Browse files- core/file_scanner.py +4 -39
core/file_scanner.py
CHANGED
@@ -21,10 +21,9 @@ class FileInfo:
|
|
21 |
return f"{self.size/(1024*1024):.1f} MB"
|
22 |
|
23 |
class FileScanner:
|
24 |
-
# スキャン対象から除外するディレクトリ
|
25 |
EXCLUDED_DIRS = {
|
26 |
-
'.git', '__pycache__', 'node_modules', 'venv',
|
27 |
-
'build', 'dist', 'target', 'bin', 'obj'
|
28 |
}
|
29 |
|
30 |
def __init__(self, base_dir: Path, target_extensions: Set[str]):
|
@@ -41,39 +40,5 @@ class FileScanner:
|
|
41 |
with file_path.open('rb') as f:
|
42 |
raw_data = f.read(4096)
|
43 |
result = chardet.detect(raw_data)
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
try:
|
48 |
-
with file_path.open('r', encoding=encoding) as f:
|
49 |
-
return f.read(), encoding
|
50 |
-
except UnicodeDecodeError:
|
51 |
-
try:
|
52 |
-
with file_path.open('r', encoding='cp932') as f:
|
53 |
-
return f.read(), 'cp932'
|
54 |
-
except UnicodeDecodeError:
|
55 |
-
return None, None
|
56 |
-
|
57 |
-
except (OSError, ValueError):
|
58 |
-
return None, None
|
59 |
-
|
60 |
-
def scan_files(self) -> List[FileInfo]:
|
61 |
-
if not self.base_dir.exists():
|
62 |
-
raise FileNotFoundError(f"ディレクトリが見つかりません: {self.base_dir}")
|
63 |
-
|
64 |
-
files = []
|
65 |
-
|
66 |
-
for entry in self.base_dir.glob("**/*"):
|
67 |
-
if entry.is_file() and self._should_scan_file(entry):
|
68 |
-
content, encoding = self._read_file_content(entry)
|
69 |
-
|
70 |
-
if content is not None:
|
71 |
-
files.append(FileInfo(
|
72 |
-
path=entry.absolute(),
|
73 |
-
size=entry.stat().st_size,
|
74 |
-
extension=entry.suffix.lower(),
|
75 |
-
content=content,
|
76 |
-
encoding=encoding
|
77 |
-
))
|
78 |
-
|
79 |
-
return sorted(files, key=lambda x: str(x.path))
|
|
|
21 |
return f"{self.size/(1024*1024):.1f} MB"
|
22 |
|
23 |
class FileScanner:
|
|
|
24 |
EXCLUDED_DIRS = {
|
25 |
+
'.git', '__pycache__', 'node_modules', 'venv',
|
26 |
+
'.env', 'build', 'dist', 'target', 'bin', 'obj'
|
27 |
}
|
28 |
|
29 |
def __init__(self, base_dir: Path, target_extensions: Set[str]):
|
|
|
40 |
with file_path.open('rb') as f:
|
41 |
raw_data = f.read(4096)
|
42 |
result = chardet.detect(raw_data)
|
43 |
+
encoding = result['encoding'] if result['confidence'] > 0.7 else 'utf-8'
|
44 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|