semcod
diff --git a/‎examples/06_metrics.py‎
Lines changed: 46 additions & 243 deletions b/‎examples/06_metrics.py‎
Lines changed: 46 additions & 243 deletions
@@ -2,15 +2,15 @@
 """
 Metrics Analysis Example - Detailed reproduction quality analysis.
 
+Uses the standardized ReproductionMetrics API.
+
 Usage:
     python 06_metrics.py tests/samples/sample_dataclasses.py
-    python 06_metrics.py code2logic/models.py --compare-formats
-    python 06_metrics.py tests/samples/ --batch
+    python 06_metrics.py tests/samples/sample_class.py --verbose
 """
 
 import sys
 import argparse
-import json
 from pathlib import Path
 
 sys.path.insert(0, str(Path(__file__).parent.parent))
@@ -23,288 +23,91 @@
 
 from code2logic import (
     ReproductionMetrics,
-    analyze_reproduction,
-    compare_formats,
-    generate_file_gherkin,
-    generate_file_yaml,
-    generate_file_json,
-    reproduce_file,
     get_client,
+    generate_file_gherkin,
 )
 from code2logic.reproduction import extract_code_block
 
 
-def generate_from_template(spec: str) -> str:
-    """Simple template-based code generation as fallback."""
-    # Extract class names from spec
-    import re
-    
-    # Look for actual class names in the spec
-    classes = []
-    
-    # Pattern 1: Look for class declarations in examples
-    class_matches = re.findall(r'class (\w+)', spec, re.MULTILINE)
-    classes.extend(class_matches)
-    
-    # Pattern 2: Look for dataclass scenarios
-    dataclass_matches = re.findall(r'Scenario: (\w+)', spec, re.MULTILINE)
-    classes.extend(dataclass_matches)
-    
-    # Pattern 3: Look for feature names (often class names)
-    feature_matches = re.findall(r'Feature: (\w+)', spec, re.MULTILINE)
-    classes.extend(feature_matches)
-    
-    # Filter valid class names
-    valid_classes = []
-    for c in classes:
-        if (c.isidentifier() 
-            and c not in ['Given', 'When', 'Then', 'And', 'Background', 'Scenario', 'Feature', 'Core']
-            and not c.lower() in ['test', 'example', 'sample', 'dataclass', 'define']
-            and len(c) > 1):  # Skip single letters
-            valid_classes.append(c)
-    
-    # Remove duplicates and limit
-    valid_classes = list(set(valid_classes))[:5]
-    
-    # Generate code
-    code = '''from dataclasses import dataclass, field
-from typing import Optional, List, Dict
-from datetime import datetime
-
-'''
-    
-    if valid_classes:
-        for class_name in valid_classes:
-            code += f'''@dataclass
-class {class_name}:
-    """Generated from specification."""
-    # TODO: Add fields based on original code
-    
-'''
-    else:
-        # Fallback: generate a generic class
-        code += '''@dataclass
-class GeneratedClass:
-    """Generated class from specification."""
-    name: str = ""
-    description: str = ""
-    
-'''
-    
-    return code
-
-
-def analyze_single(source_path: str, verbose: bool = False, no_llm: bool = False):
+def analyze_file(source_path: str, verbose: bool = False, no_llm: bool = False):
     """Analyze single file with detailed metrics."""
     path = Path(source_path)
     original = path.read_text()
 
-    print(f"\nAnalyzing: {source_path}")
-    print("="*60)
+    print(f"\n{'='*60}")
+    print(f"METRICS ANALYSIS: {source_path}")
+    print(f"{'='*60}")
 
-    # Generate spec and reproduce
+    # Generate spec
     spec = generate_file_gherkin(source_path)
-    print(f"  Generated Gherkin spec: {len(spec)} chars")
+    print(f"Spec size: {len(spec)} chars ({len(spec)//4} tokens)")
 
+    # Reproduce
     if no_llm:
-        print("\nUsing template-based generation (--no-llm flag)...")
-        generated = generate_from_template(spec)
+        generated = _template_generate(spec)
+        print("Using template generation (--no-llm)")
     else:
         try:
             client = get_client()
-            prompt = f"""Generate Python code from this Gherkin specification:
-
-{spec}
-
-Generate complete, working Python code."""
-            
+            prompt = f"Generate Python code from this Gherkin spec:\n\n{spec}\n\nOutput only code."
             response = client.generate(prompt, max_tokens=4000)
             generated = extract_code_block(response)
+            print(f"Generated: {len(generated)} chars")
         except Exception as e:
-            print(f"\n⚠️  LLM generation failed: {e}")
-            print("Using template-based generation instead...")
-            # Fallback to template generation
-            generated = generate_from_template(spec)
-    
-    print(f"  Generated code: {len(generated)} chars")
+            print(f"LLM failed: {e}, using template")
+            generated = _template_generate(spec)
 
-    # Analyze with metrics
+    # Analyze
     metrics = ReproductionMetrics(verbose=verbose)
-    result = metrics.analyze(
-        original, generated, spec, 
-        format_name='gherkin',
-        source_file=source_path,
-    )
+    result = metrics.analyze(original, generated, spec, format_name='gherkin', source_file=source_path)
 
     # Print results
-    print(f"\n📊 Overall Score: {result.overall_score:.1f}% (Grade: {result.quality_grade})")
+    print(f"\n📊 Overall: {result.overall_score:.1f}% ({result.quality_grade})")
 
     print(f"\n📝 Text Metrics:")
-    print(f"   Character Similarity: {result.text.char_similarity:.1f}%")
-    print(f"   Line Similarity:      {result.text.line_similarity:.1f}%")
-    print(f"   Word Similarity:      {result.text.word_similarity:.1f}%")
-    print(f"   Jaccard Similarity:   {result.text.jaccard_similarity:.1f}%")
-    print(f"   Cosine Similarity:    {result.text.cosine_similarity:.1f}%")
-    print(f"   Diff Changes:         {result.text.diff_changed} lines")
+    print(f"   Cosine Similarity:  {result.text.cosine_similarity:.1f}%")
+    print(f"   Jaccard Similarity: {result.text.jaccard_similarity:.1f}%")
 
-    print(f"\n🏗️ Structural Metrics:")
-    print(f"   Classes:    {result.structural.classes_original} → {result.structural.classes_generated} {'✓' if result.structural.classes_match else '✗'}")
-    print(f"   Functions:  {result.structural.functions_original} → {result.structural.functions_generated} {'✓' if result.structural.functions_match else '✗'}")
-    print(f"   Methods:    {result.structural.methods_original} → {result.structural.methods_generated} {'✓' if result.structural.methods_match else '✗'}")
-    print(f"   Imports:    {result.structural.imports_original} → {result.structural.imports_generated} {'✓' if result.structural.imports_match else '✗'}")
-    print(f"   Score:      {result.structural.structural_score:.1f}%")
+    print(f"\n🏗️ Structural:")
+    print(f"   Classes:   {result.structural.classes_original} → {result.structural.classes_generated}")
+    print(f"   Functions: {result.structural.functions_original} → {result.structural.functions_generated}")
+    print(f"   Score:     {result.structural.structural_score:.1f}%")
 
-    print(f"\n🎯 Semantic Metrics:")
-    print(f"   Naming Similarity:  {result.semantic.naming_similarity:.1f}%")
-    print(f"   Type Hints:         {result.semantic.type_hints_present:.1f}%")
-    print(f"   Docstrings:         {result.semantic.docstring_present:.1f}%")
-    print(f"   Signatures:         {result.semantic.signature_match:.1f}%")
-    print(f"   Intent Score:       {result.semantic.intent_score:.1f}%")
+    print(f"\n🎯 Semantic:")
+    print(f"   Naming:    {result.semantic.naming_similarity:.1f}%")
+    print(f"   Intent:    {result.semantic.intent_score:.1f}%")
 
-    print(f"\n📦 Format Efficiency:")
-    print(f"   Spec Size:       {result.format.spec_chars} chars ({result.format.spec_tokens} tokens)")
-    print(f"   Compression:     {result.format.compression_ratio:.2f}x")
-    print(f"   Efficiency:      {result.format.efficiency_score:.2f}")
+    print(f"\n📦 Efficiency:")
+    print(f"   Compression: {result.format.compression_ratio:.2f}x")
 
-    print(f"\n💡 Recommendations:")
-    for rec in result.recommendations:
-        print(f"   • {rec}")
+    if result.recommendations:
+        print(f"\n💡 Recommendations:")
+        for rec in result.recommendations[:3]:
+            print(f"   • {rec}")
 
     return result
 
 
-def compare_all_formats(source_path: str, no_llm: bool = False):
-    """Compare reproduction across all formats."""
-    path = Path(source_path)
-    original = path.read_text()
-    
-    print(f"\nComparing formats for: {source_path}")
-    print("="*60)
-    
-    # Generate specs
-    formats = {}
-    for fmt, generator in [
-        ('gherkin', generate_file_gherkin),
-        ('yaml', generate_file_yaml),
-        ('json', generate_file_json),
-    ]:
-        print(f"  Generating {fmt} spec...", end=" ", flush=True)
-        spec = generator(source_path)
-        formats[fmt] = spec
-        print(f"✓ ({len(spec)} chars)")
-    
-    results = {}
-    
-    for fmt, spec in formats.items():
-        print(f"\n  Testing {fmt}...", end=" ", flush=True)
-        
-        if no_llm:
-            generated = generate_from_template(spec)
-            results[fmt] = (spec, generated)
-            print(f"✓ (template, {len(generated)} chars)")
-        else:
-            try:
-                client = get_client()
-                prompt = f"""Generate Python code from this {fmt} specification:
-
-{spec[:4000]}
-
-Generate complete, working Python code."""
-                
-                response = client.generate(prompt, max_tokens=4000)
-                generated = extract_code_block(response)
-                results[fmt] = (spec, generated)
-                print(f"✓ ({len(generated)} chars)")
-            except Exception as e:
-                print(f"✗ ({e})")
-                # Fallback to template
-                generated = generate_from_template(spec)
-                results[fmt] = (spec, generated)
-    
-    # Compare
-    comparison = compare_formats(original, results)
-    
-    # Print comparison
-    print(f"\n📊 Format Comparison:")
-    print("-"*60)
-    print(f"{'Format':<12} {'Overall':>10} {'Grade':>6} {'Text':>10} {'Struct':>10} {'Semantic':>10}")
-    print("-"*60)
-    
-    for fmt, summary in comparison['summary'].items():
-        print(f"{fmt:<12} {summary['overall']:>9.1f}% {summary['grade']:>6} "
-              f"{summary['text']:>9.1f}% {summary['structural']:>9.1f}% {summary['semantic']:>9.1f}%")
-    
-    print("-"*60)
-    print(f"\n🏆 Best Format by Category:")
-    for category, fmt in comparison['best'].items():
-        print(f"   {category}: {fmt}")
-    
-    return comparison
-
-
-def batch_analyze(project_path: str, no_llm: bool = False):
-    """Analyze all Python files in a directory."""
-    path = Path(project_path)
-    files = list(path.glob('*.py'))
-    
-    print(f"\nBatch Analysis: {project_path}")
-    print(f"Found {len(files)} Python files")
-    print("="*60)
-    
-    all_results = []
-    
-    for file_path in files[:5]:  # Limit to 5 for demo
-        try:
-            result = analyze_single(str(file_path), verbose=False, no_llm=no_llm)
-            all_results.append({
-                'file': file_path.name,
-                'score': result.overall_score,
-                'grade': result.quality_grade,
-            })
-        except Exception as e:
-            print(f"Error analyzing {file_path.name}: {e}")
+def _template_generate(spec: str) -> str:
+    """Simple template fallback."""
+    import re
+    classes = list(set(re.findall(r'class (\w+)', spec)))[:3]
 
-    # Summary
-    if all_results:
-        avg_score = sum(r['score'] for r in all_results) / len(all_results)
-        print(f"\n📈 Batch Summary:")
-        print(f"   Files analyzed: {len(all_results)}")
-        print(f"   Average score:  {avg_score:.1f}%")
-        
-        print(f"\n   By file:")
-        for r in sorted(all_results, key=lambda x: -x['score']):
-            print(f"     {r['file']}: {r['score']:.1f}% ({r['grade']})")
+    code = "from dataclasses import dataclass\nfrom typing import Optional, List\n\n"
+    for cls in classes:
+        if cls.isidentifier() and cls not in ['Given', 'When', 'Then']:
+            code += f"@dataclass\nclass {cls}:\n    pass\n\n"
+    return code
 
 
 def main():
     parser = argparse.ArgumentParser(description='Reproduction metrics analysis')
     parser.add_argument('source', nargs='?', default='tests/samples/sample_dataclasses.py')
-    parser.add_argument('--compare-formats', '-c', action='store_true')
-    parser.add_argument('--batch', '-b', action='store_true')
     parser.add_argument('--verbose', '-v', action='store_true')
-    parser.add_argument('--output', '-o', default='examples/output/metrics_report.md', help='Save report to file')
-    parser.add_argument('--no-llm', action='store_true', help='Skip LLM generation, use templates only')
+    parser.add_argument('--no-llm', action='store_true')
     args = parser.parse_args()
 
-    print("="*60)
-    print("CODE2LOGIC - METRICS ANALYSIS")
-    print("="*60)
-    
-    if args.no_llm:
-        print("\n⚠️  Running in template-only mode (--no-llm)")
-    
-    if args.batch:
-        batch_analyze(args.source, args.no_llm)
-    elif args.compare_formats:
-        result = compare_all_formats(args.source, args.no_llm)
-        if args.output:
-            Path(args.output).write_text(json.dumps(result, indent=2))
-            print(f"\nSaved to: {args.output}")
-    else:
-        result = analyze_single(args.source, args.verbose, args.no_llm)
-        if args.output:
-            Path(args.output).write_text(result.to_report())
-            print(f"\nReport saved to: {args.output}")
+    analyze_file(args.source, args.verbose, args.no_llm)
 
 
 if __name__ == '__main__':