Skip to content

Commit 7544c24

Browse files
refactoring
1 parent af85d45 commit 7544c24

2 files changed

Lines changed: 46 additions & 715 deletions

File tree

examples/06_metrics.py

Lines changed: 46 additions & 243 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
"""
33
Metrics Analysis Example - Detailed reproduction quality analysis.
44
5+
Uses the standardized ReproductionMetrics API.
6+
57
Usage:
68
python 06_metrics.py tests/samples/sample_dataclasses.py
7-
python 06_metrics.py code2logic/models.py --compare-formats
8-
python 06_metrics.py tests/samples/ --batch
9+
python 06_metrics.py tests/samples/sample_class.py --verbose
910
"""
1011

1112
import sys
1213
import argparse
13-
import json
1414
from pathlib import Path
1515

1616
sys.path.insert(0, str(Path(__file__).parent.parent))
@@ -23,288 +23,91 @@
2323

2424
from code2logic import (
2525
ReproductionMetrics,
26-
analyze_reproduction,
27-
compare_formats,
28-
generate_file_gherkin,
29-
generate_file_yaml,
30-
generate_file_json,
31-
reproduce_file,
3226
get_client,
27+
generate_file_gherkin,
3328
)
3429
from code2logic.reproduction import extract_code_block
3530

3631

37-
def generate_from_template(spec: str) -> str:
38-
"""Simple template-based code generation as fallback."""
39-
# Extract class names from spec
40-
import re
41-
42-
# Look for actual class names in the spec
43-
classes = []
44-
45-
# Pattern 1: Look for class declarations in examples
46-
class_matches = re.findall(r'class (\w+)', spec, re.MULTILINE)
47-
classes.extend(class_matches)
48-
49-
# Pattern 2: Look for dataclass scenarios
50-
dataclass_matches = re.findall(r'Scenario: (\w+)', spec, re.MULTILINE)
51-
classes.extend(dataclass_matches)
52-
53-
# Pattern 3: Look for feature names (often class names)
54-
feature_matches = re.findall(r'Feature: (\w+)', spec, re.MULTILINE)
55-
classes.extend(feature_matches)
56-
57-
# Filter valid class names
58-
valid_classes = []
59-
for c in classes:
60-
if (c.isidentifier()
61-
and c not in ['Given', 'When', 'Then', 'And', 'Background', 'Scenario', 'Feature', 'Core']
62-
and not c.lower() in ['test', 'example', 'sample', 'dataclass', 'define']
63-
and len(c) > 1): # Skip single letters
64-
valid_classes.append(c)
65-
66-
# Remove duplicates and limit
67-
valid_classes = list(set(valid_classes))[:5]
68-
69-
# Generate code
70-
code = '''from dataclasses import dataclass, field
71-
from typing import Optional, List, Dict
72-
from datetime import datetime
73-
74-
'''
75-
76-
if valid_classes:
77-
for class_name in valid_classes:
78-
code += f'''@dataclass
79-
class {class_name}:
80-
"""Generated from specification."""
81-
# TODO: Add fields based on original code
82-
83-
'''
84-
else:
85-
# Fallback: generate a generic class
86-
code += '''@dataclass
87-
class GeneratedClass:
88-
"""Generated class from specification."""
89-
name: str = ""
90-
description: str = ""
91-
92-
'''
93-
94-
return code
95-
96-
97-
def analyze_single(source_path: str, verbose: bool = False, no_llm: bool = False):
32+
def analyze_file(source_path: str, verbose: bool = False, no_llm: bool = False):
9833
"""Analyze single file with detailed metrics."""
9934
path = Path(source_path)
10035
original = path.read_text()
10136

102-
print(f"\nAnalyzing: {source_path}")
103-
print("="*60)
37+
print(f"\n{'='*60}")
38+
print(f"METRICS ANALYSIS: {source_path}")
39+
print(f"{'='*60}")
10440

105-
# Generate spec and reproduce
41+
# Generate spec
10642
spec = generate_file_gherkin(source_path)
107-
print(f" Generated Gherkin spec: {len(spec)} chars")
43+
print(f"Spec size: {len(spec)} chars ({len(spec)//4} tokens)")
10844

45+
# Reproduce
10946
if no_llm:
110-
print("\nUsing template-based generation (--no-llm flag)...")
111-
generated = generate_from_template(spec)
47+
generated = _template_generate(spec)
48+
print("Using template generation (--no-llm)")
11249
else:
11350
try:
11451
client = get_client()
115-
prompt = f"""Generate Python code from this Gherkin specification:
116-
117-
{spec}
118-
119-
Generate complete, working Python code."""
120-
52+
prompt = f"Generate Python code from this Gherkin spec:\n\n{spec}\n\nOutput only code."
12153
response = client.generate(prompt, max_tokens=4000)
12254
generated = extract_code_block(response)
55+
print(f"Generated: {len(generated)} chars")
12356
except Exception as e:
124-
print(f"\n⚠️ LLM generation failed: {e}")
125-
print("Using template-based generation instead...")
126-
# Fallback to template generation
127-
generated = generate_from_template(spec)
128-
129-
print(f" Generated code: {len(generated)} chars")
57+
print(f"LLM failed: {e}, using template")
58+
generated = _template_generate(spec)
13059

131-
# Analyze with metrics
60+
# Analyze
13261
metrics = ReproductionMetrics(verbose=verbose)
133-
result = metrics.analyze(
134-
original, generated, spec,
135-
format_name='gherkin',
136-
source_file=source_path,
137-
)
62+
result = metrics.analyze(original, generated, spec, format_name='gherkin', source_file=source_path)
13863

13964
# Print results
140-
print(f"\n📊 Overall Score: {result.overall_score:.1f}% (Grade: {result.quality_grade})")
65+
print(f"\n📊 Overall: {result.overall_score:.1f}% ({result.quality_grade})")
14166

14267
print(f"\n📝 Text Metrics:")
143-
print(f" Character Similarity: {result.text.char_similarity:.1f}%")
144-
print(f" Line Similarity: {result.text.line_similarity:.1f}%")
145-
print(f" Word Similarity: {result.text.word_similarity:.1f}%")
146-
print(f" Jaccard Similarity: {result.text.jaccard_similarity:.1f}%")
147-
print(f" Cosine Similarity: {result.text.cosine_similarity:.1f}%")
148-
print(f" Diff Changes: {result.text.diff_changed} lines")
68+
print(f" Cosine Similarity: {result.text.cosine_similarity:.1f}%")
69+
print(f" Jaccard Similarity: {result.text.jaccard_similarity:.1f}%")
14970

150-
print(f"\n🏗️ Structural Metrics:")
151-
print(f" Classes: {result.structural.classes_original}{result.structural.classes_generated} {'✓' if result.structural.classes_match else '✗'}")
152-
print(f" Functions: {result.structural.functions_original}{result.structural.functions_generated} {'✓' if result.structural.functions_match else '✗'}")
153-
print(f" Methods: {result.structural.methods_original}{result.structural.methods_generated} {'✓' if result.structural.methods_match else '✗'}")
154-
print(f" Imports: {result.structural.imports_original}{result.structural.imports_generated} {'✓' if result.structural.imports_match else '✗'}")
155-
print(f" Score: {result.structural.structural_score:.1f}%")
71+
print(f"\n🏗️ Structural:")
72+
print(f" Classes: {result.structural.classes_original}{result.structural.classes_generated}")
73+
print(f" Functions: {result.structural.functions_original}{result.structural.functions_generated}")
74+
print(f" Score: {result.structural.structural_score:.1f}%")
15675

157-
print(f"\n🎯 Semantic Metrics:")
158-
print(f" Naming Similarity: {result.semantic.naming_similarity:.1f}%")
159-
print(f" Type Hints: {result.semantic.type_hints_present:.1f}%")
160-
print(f" Docstrings: {result.semantic.docstring_present:.1f}%")
161-
print(f" Signatures: {result.semantic.signature_match:.1f}%")
162-
print(f" Intent Score: {result.semantic.intent_score:.1f}%")
76+
print(f"\n🎯 Semantic:")
77+
print(f" Naming: {result.semantic.naming_similarity:.1f}%")
78+
print(f" Intent: {result.semantic.intent_score:.1f}%")
16379

164-
print(f"\n📦 Format Efficiency:")
165-
print(f" Spec Size: {result.format.spec_chars} chars ({result.format.spec_tokens} tokens)")
166-
print(f" Compression: {result.format.compression_ratio:.2f}x")
167-
print(f" Efficiency: {result.format.efficiency_score:.2f}")
80+
print(f"\n📦 Efficiency:")
81+
print(f" Compression: {result.format.compression_ratio:.2f}x")
16882

169-
print(f"\n💡 Recommendations:")
170-
for rec in result.recommendations:
171-
print(f" • {rec}")
83+
if result.recommendations:
84+
print(f"\n💡 Recommendations:")
85+
for rec in result.recommendations[:3]:
86+
print(f" • {rec}")
17287

17388
return result
17489

17590

176-
def compare_all_formats(source_path: str, no_llm: bool = False):
177-
"""Compare reproduction across all formats."""
178-
path = Path(source_path)
179-
original = path.read_text()
180-
181-
print(f"\nComparing formats for: {source_path}")
182-
print("="*60)
183-
184-
# Generate specs
185-
formats = {}
186-
for fmt, generator in [
187-
('gherkin', generate_file_gherkin),
188-
('yaml', generate_file_yaml),
189-
('json', generate_file_json),
190-
]:
191-
print(f" Generating {fmt} spec...", end=" ", flush=True)
192-
spec = generator(source_path)
193-
formats[fmt] = spec
194-
print(f"✓ ({len(spec)} chars)")
195-
196-
results = {}
197-
198-
for fmt, spec in formats.items():
199-
print(f"\n Testing {fmt}...", end=" ", flush=True)
200-
201-
if no_llm:
202-
generated = generate_from_template(spec)
203-
results[fmt] = (spec, generated)
204-
print(f"✓ (template, {len(generated)} chars)")
205-
else:
206-
try:
207-
client = get_client()
208-
prompt = f"""Generate Python code from this {fmt} specification:
209-
210-
{spec[:4000]}
211-
212-
Generate complete, working Python code."""
213-
214-
response = client.generate(prompt, max_tokens=4000)
215-
generated = extract_code_block(response)
216-
results[fmt] = (spec, generated)
217-
print(f"✓ ({len(generated)} chars)")
218-
except Exception as e:
219-
print(f"✗ ({e})")
220-
# Fallback to template
221-
generated = generate_from_template(spec)
222-
results[fmt] = (spec, generated)
223-
224-
# Compare
225-
comparison = compare_formats(original, results)
226-
227-
# Print comparison
228-
print(f"\n📊 Format Comparison:")
229-
print("-"*60)
230-
print(f"{'Format':<12} {'Overall':>10} {'Grade':>6} {'Text':>10} {'Struct':>10} {'Semantic':>10}")
231-
print("-"*60)
232-
233-
for fmt, summary in comparison['summary'].items():
234-
print(f"{fmt:<12} {summary['overall']:>9.1f}% {summary['grade']:>6} "
235-
f"{summary['text']:>9.1f}% {summary['structural']:>9.1f}% {summary['semantic']:>9.1f}%")
236-
237-
print("-"*60)
238-
print(f"\n🏆 Best Format by Category:")
239-
for category, fmt in comparison['best'].items():
240-
print(f" {category}: {fmt}")
241-
242-
return comparison
243-
244-
245-
def batch_analyze(project_path: str, no_llm: bool = False):
246-
"""Analyze all Python files in a directory."""
247-
path = Path(project_path)
248-
files = list(path.glob('*.py'))
249-
250-
print(f"\nBatch Analysis: {project_path}")
251-
print(f"Found {len(files)} Python files")
252-
print("="*60)
253-
254-
all_results = []
255-
256-
for file_path in files[:5]: # Limit to 5 for demo
257-
try:
258-
result = analyze_single(str(file_path), verbose=False, no_llm=no_llm)
259-
all_results.append({
260-
'file': file_path.name,
261-
'score': result.overall_score,
262-
'grade': result.quality_grade,
263-
})
264-
except Exception as e:
265-
print(f"Error analyzing {file_path.name}: {e}")
91+
def _template_generate(spec: str) -> str:
92+
"""Simple template fallback."""
93+
import re
94+
classes = list(set(re.findall(r'class (\w+)', spec)))[:3]
26695

267-
# Summary
268-
if all_results:
269-
avg_score = sum(r['score'] for r in all_results) / len(all_results)
270-
print(f"\n📈 Batch Summary:")
271-
print(f" Files analyzed: {len(all_results)}")
272-
print(f" Average score: {avg_score:.1f}%")
273-
274-
print(f"\n By file:")
275-
for r in sorted(all_results, key=lambda x: -x['score']):
276-
print(f" {r['file']}: {r['score']:.1f}% ({r['grade']})")
96+
code = "from dataclasses import dataclass\nfrom typing import Optional, List\n\n"
97+
for cls in classes:
98+
if cls.isidentifier() and cls not in ['Given', 'When', 'Then']:
99+
code += f"@dataclass\nclass {cls}:\n pass\n\n"
100+
return code
277101

278102

279103
def main():
280104
parser = argparse.ArgumentParser(description='Reproduction metrics analysis')
281105
parser.add_argument('source', nargs='?', default='tests/samples/sample_dataclasses.py')
282-
parser.add_argument('--compare-formats', '-c', action='store_true')
283-
parser.add_argument('--batch', '-b', action='store_true')
284106
parser.add_argument('--verbose', '-v', action='store_true')
285-
parser.add_argument('--output', '-o', default='examples/output/metrics_report.md', help='Save report to file')
286-
parser.add_argument('--no-llm', action='store_true', help='Skip LLM generation, use templates only')
107+
parser.add_argument('--no-llm', action='store_true')
287108
args = parser.parse_args()
288109

289-
print("="*60)
290-
print("CODE2LOGIC - METRICS ANALYSIS")
291-
print("="*60)
292-
293-
if args.no_llm:
294-
print("\n⚠️ Running in template-only mode (--no-llm)")
295-
296-
if args.batch:
297-
batch_analyze(args.source, args.no_llm)
298-
elif args.compare_formats:
299-
result = compare_all_formats(args.source, args.no_llm)
300-
if args.output:
301-
Path(args.output).write_text(json.dumps(result, indent=2))
302-
print(f"\nSaved to: {args.output}")
303-
else:
304-
result = analyze_single(args.source, args.verbose, args.no_llm)
305-
if args.output:
306-
Path(args.output).write_text(result.to_report())
307-
print(f"\nReport saved to: {args.output}")
110+
analyze_file(args.source, args.verbose, args.no_llm)
308111

309112

310113
if __name__ == '__main__':

0 commit comments

Comments
 (0)