|
8 | 8 | from collections import defaultdict |
9 | 9 | from datetime import datetime |
10 | 10 | from pathlib import Path |
11 | | -from typing import Dict, List |
| 11 | +from typing import Dict, List, Optional |
12 | 12 |
|
13 | 13 | from .dependency import NETWORKX_AVAILABLE, DependencyAnalyzer |
14 | 14 | from .intent import NLTK_AVAILABLE, SPACY_AVAILABLE |
@@ -45,9 +45,13 @@ class ProjectAnalyzer: |
45 | 45 | LANGUAGE_EXTENSIONS: Dict[str, str] = { |
46 | 46 | '.py': 'python', |
47 | 47 | '.js': 'javascript', |
| 48 | + '.mjs': 'javascript', |
| 49 | + '.cjs': 'javascript', |
48 | 50 | '.jsx': 'javascript', |
49 | 51 | '.ts': 'typescript', |
50 | 52 | '.tsx': 'typescript', |
| 53 | + '.mts': 'typescript', |
| 54 | + '.cts': 'typescript', |
51 | 55 | '.sql': 'sql', |
52 | 56 | '.java': 'java', |
53 | 57 | '.go': 'go', |
@@ -82,6 +86,17 @@ class ProjectAnalyzer: |
82 | 86 | 'Cargo.lock', 'pnpm-lock.yaml', |
83 | 87 | } |
84 | 88 |
|
| 89 | + @staticmethod |
| 90 | + def _language_from_shebang(first_line: str) -> Optional[str]: |
| 91 | + s = (first_line or '').strip().lower() |
| 92 | + if not s.startswith('#!'): |
| 93 | + return None |
| 94 | + if 'python' in s: |
| 95 | + return 'python' |
| 96 | + if 'node' in s: |
| 97 | + return 'javascript' |
| 98 | + return None |
| 99 | + |
85 | 100 | def __init__( |
86 | 101 | self, |
87 | 102 | root_path: str, |
@@ -177,12 +192,18 @@ def _scan_files(self): |
177 | 192 | if fp.name in self.IGNORE_FILES: |
178 | 193 | continue |
179 | 194 |
|
180 | | - # Check extension |
181 | 195 | ext = fp.suffix.lower() |
182 | | - if ext not in self.LANGUAGE_EXTENSIONS: |
| 196 | + language = self.LANGUAGE_EXTENSIONS.get(ext) |
| 197 | + if language is None and ext == '': |
| 198 | + try: |
| 199 | + with fp.open('r', encoding='utf-8', errors='ignore') as f: |
| 200 | + language = self._language_from_shebang(f.readline()) |
| 201 | + except Exception: |
| 202 | + language = None |
| 203 | + |
| 204 | + if language is None: |
183 | 205 | continue |
184 | 206 |
|
185 | | - language = self.LANGUAGE_EXTENSIONS[ext] |
186 | 207 | self.languages[language] += 1 |
187 | 208 |
|
188 | 209 | # Read file |
|
0 commit comments