Skip to content

Commit e17fce2

Browse files
refactoring
1 parent d9c2691 commit e17fce2

File tree

7 files changed

+619
-137
lines changed

7 files changed

+619
-137
lines changed

code2logic/analyzer.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def analyze(self) -> ProjectInfo:
157157
similar_functions=similar,
158158
total_files=len(self.modules),
159159
total_lines=sum(m.lines_total for m in self.modules),
160+
total_bytes=sum(getattr(m, 'file_bytes', 0) for m in self.modules),
160161
generated_at=datetime.now().isoformat()
161162
)
162163

@@ -208,6 +209,10 @@ def _scan_files(self):
208209
continue
209210

210211
if module:
212+
try:
213+
module.file_bytes = fp.stat().st_size
214+
except Exception:
215+
module.file_bytes = len(content.encode('utf-8', errors='ignore'))
211216
self.modules.append(module)
212217

213218
def _detect_entrypoints(self) -> List[str]:

code2logic/generators.py

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,18 @@
1414

1515
from .models import (
1616
ProjectInfo, ModuleInfo, ClassInfo, FunctionInfo,
17-
DependencyNode, ConstantInfo
17+
DependencyNode, ConstantInfo, FieldInfo
1818
)
1919
from .shared_utils import categorize_function, extract_domain, compute_hash, remove_self_from_params, compact_imports, deduplicate_imports
2020

2121

22+
def bytes_to_kb(bytes_value: int) -> float:
23+
"""Convert bytes to kilobytes with single decimal precision."""
24+
if not bytes_value:
25+
return 0.0
26+
return round(bytes_value / 1024, 1)
27+
28+
2229
class MarkdownGenerator:
2330
"""
2431
Generates Markdown output for project analysis.
@@ -410,6 +417,17 @@ def ser_func(f: FunctionInfo) -> dict:
410417
data['lines'] = f.lines
411418
data['is_private'] = f.is_private
412419
return data
420+
421+
def _field_to_dict(self, field: FieldInfo) -> dict:
422+
"""Serialize dataclass FieldInfo to dictionary."""
423+
data = {'name': field.name}
424+
if getattr(field, 'type_annotation', None):
425+
data['type'] = field.type_annotation
426+
if getattr(field, 'default', None):
427+
data['default'] = field.default
428+
if getattr(field, 'default_factory', None):
429+
data['factory'] = field.default_factory
430+
return data
413431

414432
def ser_class(c: ClassInfo) -> dict:
415433
data = {
@@ -1300,6 +1318,9 @@ def _build_flat_data(self, project: ProjectInfo, detail: str) -> dict:
13001318
def _build_nested_data(self, project: ProjectInfo, detail: str) -> dict:
13011319
"""Build nested hierarchical data structure."""
13021320
modules = []
1321+
module_overview = []
1322+
for m in project.modules:
1323+
module_overview.append(f"{m.path}:{m.lines_code}")
13031324
for m in project.modules:
13041325
module_data = {
13051326
'path': m.path,
@@ -1313,28 +1334,49 @@ def _build_nested_data(self, project: ProjectInfo, detail: str) -> dict:
13131334

13141335
if m.classes:
13151336
module_data['classes'] = []
1337+
dataclass_summary = []
13161338
for c in m.classes:
13171339
cls_data = {
13181340
'name': c.name,
13191341
'bases': c.bases,
13201342
'docstring': c.docstring[:80] if c.docstring else '',
13211343
}
1322-
# Include properties (critical for dataclass reproduction)
1323-
if c.properties:
1344+
if getattr(c, 'properties', None):
13241345
cls_data['properties'] = c.properties[:20]
1346+
# Include properties (critical for dataclass reproduction)
1347+
if getattr(c, 'is_dataclass', False):
1348+
cls_data['dataclass'] = True
1349+
if getattr(c, 'fields', None):
1350+
cls_data['fields'] = [
1351+
self._field_to_dict(field)
1352+
for field in c.fields[:15]
1353+
]
1354+
dataclass_summary.append({
1355+
'name': c.name,
1356+
'fields': [
1357+
self._field_to_dict(field)
1358+
for field in c.fields[:15]
1359+
] if getattr(c, 'fields', None) else []
1360+
})
13251361
if c.methods:
13261362
cls_data['methods'] = [
13271363
self._method_to_dict(method, detail)
13281364
for method in c.methods[:15]
13291365
]
13301366
module_data['classes'].append(cls_data)
1367+
if dataclass_summary:
1368+
module_data['dataclasses'] = dataclass_summary[:10]
13311369

13321370
if m.functions:
13331371
module_data['functions'] = [
13341372
self._function_to_dict(f, detail)
13351373
for f in m.functions[:20]
13361374
]
13371375

1376+
constant_entries = self._constants_for_module_verbose(m, limit=12)
1377+
if constant_entries:
1378+
module_data['constants'] = constant_entries
1379+
13381380
modules.append(module_data)
13391381

13401382
return {
@@ -1550,7 +1592,9 @@ def _build_compact_data(self, project: ProjectInfo, detail: str) -> dict:
15501592
default_lang = max(project.languages.items(), key=lambda x: x[1])[0] if project.languages else 'python'
15511593

15521594
modules = []
1595+
module_overview = []
15531596
for m in project.modules:
1597+
module_overview.append(f"{m.path}:{m.lines_code}")
15541598
mod_data = {
15551599
'p': m.path, # path
15561600
}
@@ -1586,24 +1630,16 @@ def _build_compact_data(self, project: ProjectInfo, detail: str) -> dict:
15861630
modules.append(mod_data)
15871631

15881632
return {
1633+
'header': {
1634+
'project': project.name,
1635+
'files': project.total_files,
1636+
'lines': project.total_lines,
1637+
'languages': project.languages,
1638+
'modules_count': len(project.modules),
1639+
},
1640+
'M': module_overview,
15891641
'meta': {
1590-
'legend': {
1591-
'p': 'path',
1592-
'l': 'lines',
1593-
'i': 'imports',
1594-
'e': 'exports',
1595-
'c': 'classes',
1596-
'f': 'functions',
1597-
'n': 'name',
1598-
'd': 'docstring',
1599-
'b': 'bases',
1600-
'm': 'methods',
1601-
'props': 'properties',
1602-
'sig': 'signature (without self)',
1603-
'ret': 'return_type',
1604-
'async': 'is_async',
1605-
'lang': 'language',
1606-
}
1642+
'legend': self.KEY_LEGEND.copy()
16071643
},
16081644
'defaults': {'lang': default_lang},
16091645
'modules': modules

code2logic/models.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,8 @@ class ProjectInfo:
165165
similar_functions: Dict[str, List[str]]
166166
total_files: int
167167
total_lines: int
168-
generated_at: str
168+
total_bytes: int = 0
169+
generated_at: str = ""
169170

170171

171172
# Backwards compatibility aliases for tests
@@ -317,6 +318,7 @@ class ModuleInfo:
317318
docstring: Module docstring
318319
lines_total: Total line count
319320
lines_code: Lines of actual code (excluding comments/blanks)
321+
file_bytes: Size of the source file in bytes
320322
"""
321323
path: str
322324
language: str = "python"
@@ -332,3 +334,4 @@ class ModuleInfo:
332334
docstring: Optional[str] = None
333335
lines_total: int = 0
334336
lines_code: int = 0
337+
file_bytes: int = 0

code2logic/parsers.py

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,16 @@ def _combine_import_name(module_name: str, identifier: str) -> str:
5858
return f"{module_name}.{identifier}"
5959

6060

61+
def _truncate_constant_value(value_text: str, limit: int = 400) -> str:
62+
"""Return a trimmed single-line snippet for constant values."""
63+
if not value_text:
64+
return ''
65+
snippet = value_text.replace('\n', ' ').strip()
66+
if len(snippet) > limit:
67+
snippet = snippet[: limit - 3].rstrip() + '...'
68+
return snippet
69+
70+
6171
class TreeSitterParser:
6272
"""
6373
Parser using Tree-sitter for high-accuracy AST parsing.
@@ -206,6 +216,7 @@ def _parse_python(self, filepath: str, content: str, tree) -> ModuleInfo:
206216

207217
# Deduplicate imports and normalize names at extraction time
208218
lines = content.split('\n')
219+
file_bytes = len(content.encode('utf-8', errors='ignore'))
209220

210221
# Extract TYPE_CHECKING and aliases from the entire tree
211222
type_checking_imports = self._extract_type_checking_imports(root, content)
@@ -230,7 +241,8 @@ def _parse_python(self, filepath: str, content: str, tree) -> ModuleInfo:
230241
aliases=aliases,
231242
docstring=self._truncate_docstring(docstring),
232243
lines_total=len(lines),
233-
lines_code=len([l for l in lines if l.strip() and not l.strip().startswith('#')])
244+
lines_code=len([l for l in lines if l.strip() and not l.strip().startswith('#')]),
245+
file_bytes=file_bytes,
234246
)
235247

236248
def _extract_constants(self, tree, content: str) -> List[ConstantInfo]:
@@ -676,7 +688,7 @@ def _extract_py_constant(self, node, content: str) -> Optional[ConstantInfo]:
676688
# Get the value
677689
if right:
678690
value_text = self._text(right, content).strip()
679-
const.value = value_text if len(value_text) <= 200 else None
691+
const.value = _truncate_constant_value(value_text)
680692

681693
# For dictionaries, extract keys
682694
if value_text.startswith('{') and value_text.endswith('}'):
@@ -1116,9 +1128,9 @@ def _parse_python(self, filepath: str, content: str) -> Optional[ModuleInfo]:
11161128
if func:
11171129
functions.append(func)
11181130
elif isinstance(node, ast.Assign):
1119-
for t in node.targets:
1120-
if isinstance(t, ast.Name) and t.id.isupper():
1121-
constants.append(t.id)
1131+
const = self._extract_ast_constant(node, content)
1132+
if const:
1133+
constants.append(const)
11221134

11231135
exports = [c.name for c in classes if not c.name.startswith('_')]
11241136
exports += [f.name for f in functions if not f.name.startswith('_')]
@@ -1248,6 +1260,48 @@ def _extract_ast_class(self, node: ast.ClassDef) -> ClassInfo:
12481260
is_abstract='ABC' in bases or is_dataclass,
12491261
generic_params=[]
12501262
)
1263+
1264+
def _extract_ast_constant(self, node: ast.Assign, content: str) -> Optional[ConstantInfo]:
1265+
"""Extract ConstantInfo from an AST assignment node if applicable."""
1266+
if not node.targets:
1267+
return None
1268+
target = node.targets[0]
1269+
if not isinstance(target, ast.Name) or not target.id.isupper():
1270+
return None
1271+
1272+
const = ConstantInfo(name=target.id)
1273+
value_text = self._format_ast_value(node.value, content)
1274+
if value_text:
1275+
const.value = _truncate_constant_value(value_text)
1276+
1277+
if isinstance(node.value, ast.Dict):
1278+
keys = []
1279+
for key in node.value.keys:
1280+
if isinstance(key, ast.Constant) and isinstance(key.value, str):
1281+
keys.append(key.value)
1282+
if keys:
1283+
const.value_keys = keys[:10]
1284+
return const
1285+
1286+
def _format_ast_value(self, value_node: ast.AST, content: str) -> str:
1287+
"""Best-effort string representation of an AST value node."""
1288+
if value_node is None:
1289+
return ''
1290+
try:
1291+
import ast
1292+
if hasattr(ast, "unparse"):
1293+
return ast.unparse(value_node)
1294+
except Exception:
1295+
pass
1296+
try:
1297+
segment = ast.get_source_segment(content, value_node)
1298+
if segment:
1299+
return segment
1300+
except Exception:
1301+
pass
1302+
if isinstance(value_node, ast.Constant):
1303+
return repr(value_node.value)
1304+
return ''
12511305

12521306
def _ann_str(self, node) -> str:
12531307
"""Convert AST annotation to string."""

0 commit comments

Comments
 (0)