DeL-TaiseiOzaki commited on
Commit
cefab8e
·
verified ·
1 Parent(s): 6ee54ae

Update core/file_scanner.py

Browse files
Files changed (1) hide show
  1. core/file_scanner.py +4 -39
core/file_scanner.py CHANGED
@@ -21,10 +21,9 @@ class FileInfo:
21
  return f"{self.size/(1024*1024):.1f} MB"
22
 
23
  class FileScanner:
24
- # スキャン対象から除外するディレクトリ
25
  EXCLUDED_DIRS = {
26
- '.git', '__pycache__', 'node_modules', 'venv', '.env',
27
- 'build', 'dist', 'target', 'bin', 'obj'
28
  }
29
 
30
  def __init__(self, base_dir: Path, target_extensions: Set[str]):
@@ -41,39 +40,5 @@ class FileScanner:
41
  with file_path.open('rb') as f:
42
  raw_data = f.read(4096)
43
  result = chardet.detect(raw_data)
44
-
45
- encoding = result['encoding'] if result['confidence'] > 0.7 else 'utf-8'
46
-
47
- try:
48
- with file_path.open('r', encoding=encoding) as f:
49
- return f.read(), encoding
50
- except UnicodeDecodeError:
51
- try:
52
- with file_path.open('r', encoding='cp932') as f:
53
- return f.read(), 'cp932'
54
- except UnicodeDecodeError:
55
- return None, None
56
-
57
- except (OSError, ValueError):
58
- return None, None
59
-
60
- def scan_files(self) -> List[FileInfo]:
61
- if not self.base_dir.exists():
62
- raise FileNotFoundError(f"ディレクトリが見つかりません: {self.base_dir}")
63
-
64
- files = []
65
-
66
- for entry in self.base_dir.glob("**/*"):
67
- if entry.is_file() and self._should_scan_file(entry):
68
- content, encoding = self._read_file_content(entry)
69
-
70
- if content is not None:
71
- files.append(FileInfo(
72
- path=entry.absolute(),
73
- size=entry.stat().st_size,
74
- extension=entry.suffix.lower(),
75
- content=content,
76
- encoding=encoding
77
- ))
78
-
79
- return sorted(files, key=lambda x: str(x.path))
 
21
  return f"{self.size/(1024*1024):.1f} MB"
22
 
23
  class FileScanner:
 
24
  EXCLUDED_DIRS = {
25
+ '.git', '__pycache__', 'node_modules', 'venv',
26
+ '.env', 'build', 'dist', 'target', 'bin', 'obj'
27
  }
28
 
29
  def __init__(self, base_dir: Path, target_extensions: Set[str]):
 
40
  with file_path.open('rb') as f:
41
  raw_data = f.read(4096)
42
  result = chardet.detect(raw_data)
43
+ encoding = result['encoding'] if result['confidence'] > 0.7 else 'utf-8'
44
+