feat: Add Studio UI, intake system, and extractor improvements

Dashboard: - Add Studio page with drag-drop model upload and Claude chat - Add intake system for study creation workflow - Improve session manager and context builder - Add intake API routes and frontend components Optimization Engine: - Add CLI module for command-line operations - Add intake module for study preprocessing - Add validation module with gate checks - Improve Zernike extractor documentation - Update spec models with better validation - Enhance solve_simulation robustness Documentation: - Add ATOMIZER_STUDIO.md planning doc - Add ATOMIZER_UX_SYSTEM.md for UX patterns - Update extractor library docs - Add study-readme-generator skill Tools: - Add test scripts for extraction validation - Add Zernike recentering test Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-27 12:02:30 -05:00
parent 3193831340
commit a26914bbe8
56 changed files with 14173 additions and 646 deletions
--- a/atomizer.py
+++ b/atomizer.py
@@ -34,26 +34,42 @@ from typing import Optional
 PROJECT_ROOT = Path(__file__).parent
 sys.path.insert(0, str(PROJECT_ROOT))

-from optimization_engine.processors.surrogates.auto_trainer import AutoTrainer, check_training_status
+from optimization_engine.processors.surrogates.auto_trainer import (
+    AutoTrainer,
+    check_training_status,
+)
 from optimization_engine.config.template_loader import (
    create_study_from_template,
    list_templates,
-    get_template
-)
-from optimization_engine.validators.study_validator import (
-    validate_study,
-    list_studies,
-    quick_check
+    get_template,
 )
+from optimization_engine.validators.study_validator import validate_study, list_studies, quick_check
+
+
+# New UX System imports (lazy loaded to avoid import errors)
+def get_intake_processor():
+    from optimization_engine.intake import IntakeProcessor
+
+    return IntakeProcessor
+
+
+def get_validation_gate():
+    from optimization_engine.validation import ValidationGate
+
+    return ValidationGate
+
+
+def get_report_generator():
+    from optimization_engine.reporting.html_report import HTMLReportGenerator
+
+    return HTMLReportGenerator


 def setup_logging(verbose: bool = False) -> None:
    """Configure logging."""
    level = logging.DEBUG if verbose else logging.INFO
    logging.basicConfig(
-        level=level,
-        format='%(asctime)s [%(levelname)s] %(message)s',
-        datefmt='%H:%M:%S'
+        level=level, format="%(asctime)s [%(levelname)s] %(message)s", datefmt="%H:%M:%S"
    )


@@ -95,7 +111,7 @@ def cmd_neural_optimize(args) -> int:
        study_name=args.study,
        min_points=args.min_points,
        epochs=args.epochs,
-        retrain_threshold=args.retrain_every
+        retrain_threshold=args.retrain_every,
    )

    status = trainer.get_status()
@@ -103,8 +119,8 @@ def cmd_neural_optimize(args) -> int:
    print(f"  Model version: v{status['model_version']}")

    # Determine workflow phase
-    has_trained_model = status['model_version'] > 0
-    current_points = status['total_points']
+    has_trained_model = status["model_version"] > 0
+    current_points = status["total_points"]

    if has_trained_model and current_points >= args.min_points:
        print("\n[3/5] Neural model available - starting neural-accelerated optimization...")
@@ -138,11 +154,7 @@ def _run_exploration_phase(args, trainer: AutoTrainer) -> int:
    # Run FEA optimization
    import subprocess

-    cmd = [
-        sys.executable,
-        str(run_script),
-        "--trials", str(fea_trials)
-    ]
+    cmd = [sys.executable, str(run_script), "--trials", str(fea_trials)]

    if args.resume:
        cmd.append("--resume")
@@ -155,7 +167,7 @@ def _run_exploration_phase(args, trainer: AutoTrainer) -> int:
    elapsed = time.time() - start_time

    print("-" * 60)
-    print(f"FEA optimization completed in {elapsed/60:.1f} minutes")
+    print(f"FEA optimization completed in {elapsed / 60:.1f} minutes")

    # Check if we can now train
    print("\n[5/5] Checking training data...")
@@ -169,7 +181,7 @@ def _run_exploration_phase(args, trainer: AutoTrainer) -> int:
            print("  Training failed - check logs")
    else:
        status = trainer.get_status()
-        remaining = args.min_points - status['total_points']
+        remaining = args.min_points - status["total_points"]
        print(f"  {status['total_points']} points collected")
        print(f"  Need {remaining} more for neural training")

@@ -188,12 +200,7 @@ def _run_neural_phase(args, trainer: AutoTrainer) -> int:
    # Run with neural acceleration
    import subprocess

-    cmd = [
-        sys.executable,
-        str(run_script),
-        "--trials", str(args.trials),
-        "--enable-nn"
-    ]
+    cmd = [sys.executable, str(run_script), "--trials", str(args.trials), "--enable-nn"]

    if args.resume:
        cmd.append("--resume")
@@ -206,7 +213,7 @@ def _run_neural_phase(args, trainer: AutoTrainer) -> int:
    elapsed = time.time() - start_time

    print("-" * 60)
-    print(f"Neural optimization completed in {elapsed/60:.1f} minutes")
+    print(f"Neural optimization completed in {elapsed / 60:.1f} minutes")

    # Check for retraining
    print("\n[5/5] Checking if retraining needed...")
@@ -228,10 +235,7 @@ def cmd_create_study(args) -> int:
    print(f"Creating study '{args.name}' from template '{args.template}'...")

    try:
-        study_path = create_study_from_template(
-            template_name=args.template,
-            study_name=args.name
-        )
+        study_path = create_study_from_template(template_name=args.template, study_name=args.name)
        print(f"\nSuccess! Study created at: {study_path}")
        return 0
    except FileNotFoundError as e:
@@ -290,7 +294,7 @@ def cmd_status(args) -> int:
        print(f"  Model version: v{status['model_version']}")
        print(f"  Should train: {status['should_train']}")

-        if status['latest_model']:
+        if status["latest_model"]:
            print(f"  Latest model: {status['latest_model']}")

    else:
@@ -305,8 +309,8 @@ def cmd_status(args) -> int:

        for study in studies:
            icon = "[OK]" if study["is_ready"] else "[!]"
-            trials_info = f"{study['trials']} trials" if study['trials'] > 0 else "no trials"
-            pareto_info = f", {study['pareto']} Pareto" if study['pareto'] > 0 else ""
+            trials_info = f"{study['trials']} trials" if study["trials"] > 0 else "no trials"
+            pareto_info = f", {study['pareto']} Pareto" if study["pareto"] > 0 else ""
            print(f"  {icon} {study['name']}")
            print(f"      Status: {study['status']} ({trials_info}{pareto_info})")

@@ -317,11 +321,7 @@ def cmd_train(args) -> int:
    """Trigger neural network training."""
    print(f"Training neural model for study: {args.study}")

-    trainer = AutoTrainer(
-        study_name=args.study,
-        min_points=args.min_points,
-        epochs=args.epochs
-    )
+    trainer = AutoTrainer(study_name=args.study, min_points=args.min_points, epochs=args.epochs)

    status = trainer.get_status()
    print(f"\nCurrent status:")
@@ -329,8 +329,10 @@ def cmd_train(args) -> int:
    print(f"  Min threshold: {args.min_points}")

    if args.force or trainer.should_train():
-        if args.force and status['total_points'] < args.min_points:
-            print(f"\nWarning: Force training with {status['total_points']} points (< {args.min_points})")
+        if args.force and status["total_points"] < args.min_points:
+            print(
+                f"\nWarning: Force training with {status['total_points']} points (< {args.min_points})"
+            )

        print("\nStarting training...")
        model_path = trainer.train()
@@ -342,7 +344,7 @@ def cmd_train(args) -> int:
            print("\nTraining failed - check logs")
            return 1
    else:
-        needed = args.min_points - status['total_points']
+        needed = args.min_points - status["total_points"]
        print(f"\nNot enough data for training. Need {needed} more points.")
        print("Use --force to train anyway.")
        return 1
@@ -355,6 +357,269 @@ def cmd_validate(args) -> int:
    return 0 if validation.is_ready_to_run else 1


+# ============================================================================
+# NEW UX SYSTEM COMMANDS
+# ============================================================================
+
+
+def cmd_intake(args) -> int:
+    """Process an intake folder into a study."""
+    IntakeProcessor = get_intake_processor()
+
+    # Determine inbox folder
+    inbox_path = Path(args.folder)
+
+    if not inbox_path.is_absolute():
+        inbox_dir = PROJECT_ROOT / "studies" / "_inbox"
+        if (inbox_dir / args.folder).exists():
+            inbox_path = inbox_dir / args.folder
+        elif (PROJECT_ROOT / "studies" / args.folder).exists():
+            inbox_path = PROJECT_ROOT / "studies" / args.folder
+
+    if not inbox_path.exists():
+        print(f"Error: Folder not found: {inbox_path}")
+        return 1
+
+    print(f"Processing intake: {inbox_path}")
+    print("=" * 60)
+
+    def progress(message: str, percent: float):
+        bar_width = 30
+        filled = int(bar_width * percent)
+        bar = "=" * filled + "-" * (bar_width - filled)
+        print(f"\r[{bar}] {percent * 100:5.1f}% {message}", end="", flush=True)
+        if percent >= 1.0:
+            print()
+
+    try:
+        processor = IntakeProcessor(inbox_path, progress_callback=progress)
+        context = processor.process(
+            run_baseline=not args.skip_baseline,
+            copy_files=True,
+            run_introspection=True,
+        )
+
+        print("\n" + "=" * 60)
+        print("INTAKE COMPLETE")
+        print("=" * 60)
+
+        summary = context.get_context_summary()
+        print(f"\nStudy: {context.study_name}")
+        print(f"Location: {processor.study_dir}")
+        print(f"\nContext loaded:")
+        print(f"  Model: {'Yes' if summary['has_model'] else 'No'}")
+        print(f"  Introspection: {'Yes' if summary['has_introspection'] else 'No'}")
+        print(f"  Baseline: {'Yes' if summary['has_baseline'] else 'No'}")
+        print(
+            f"  Expressions: {summary['num_expressions']} ({summary['num_dv_candidates']} candidates)"
+        )
+
+        if context.has_baseline:
+            print(f"\nBaseline: {context.get_baseline_summary()}")
+
+        if summary["warnings"]:
+            print(f"\nWarnings:")
+            for w in summary["warnings"]:
+                print(f"  - {w}")
+
+        print(f"\nNext: atomizer gate {context.study_name}")
+        return 0
+
+    except Exception as e:
+        print(f"\nError: {e}")
+        if args.verbose:
+            import traceback
+
+            traceback.print_exc()
+        return 1
+
+
+def cmd_gate(args) -> int:
+    """Run validation gate before optimization."""
+    ValidationGate = get_validation_gate()
+
+    study_path = Path(args.study)
+    if not study_path.is_absolute():
+        study_path = PROJECT_ROOT / "studies" / args.study
+
+    if not study_path.exists():
+        print(f"Error: Study not found: {study_path}")
+        return 1
+
+    print(f"Validation Gate: {study_path.name}")
+    print("=" * 60)
+
+    def progress(message: str, percent: float):
+        bar_width = 30
+        filled = int(bar_width * percent)
+        bar = "=" * filled + "-" * (bar_width - filled)
+        print(f"\r[{bar}] {percent * 100:5.1f}% {message}", end="", flush=True)
+        if percent >= 1.0:
+            print()
+
+    try:
+        gate = ValidationGate(study_path, progress_callback=progress)
+        result = gate.validate(
+            run_test_trials=not args.skip_trials,
+            n_test_trials=args.trials,
+        )
+
+        print("\n" + "=" * 60)
+        if result.passed:
+            print("VALIDATION PASSED")
+        else:
+            print("VALIDATION FAILED")
+        print("=" * 60)
+
+        # Show test trials
+        if result.test_trials:
+            print(
+                f"\nTest Trials: {len([t for t in result.test_trials if t.success])}/{len(result.test_trials)} passed"
+            )
+
+            if result.results_vary:
+                print("Results vary: Yes (mesh updating correctly)")
+            else:
+                print("Results vary: NO - MESH MAY NOT BE UPDATING!")
+
+            # Results table
+            print(f"\n{'Trial':<8} {'Status':<8} {'Time':<8}", end="")
+            if result.test_trials and result.test_trials[0].objectives:
+                for obj in list(result.test_trials[0].objectives.keys())[:3]:
+                    print(f" {obj[:10]:<12}", end="")
+            print()
+
+            for trial in result.test_trials:
+                status = "OK" if trial.success else "FAIL"
+                print(
+                    f"{trial.trial_number:<8} {status:<8} {trial.solve_time_seconds:<8.1f}", end=""
+                )
+                for val in list(trial.objectives.values())[:3]:
+                    print(f" {val:<12.4f}", end="")
+                print()
+
+        # Runtime estimate
+        if result.avg_solve_time:
+            print(f"\nRuntime Estimate:")
+            print(f"  Avg solve: {result.avg_solve_time:.1f}s")
+            if result.estimated_total_runtime:
+                print(f"  Total: {result.estimated_total_runtime / 3600:.1f}h")
+
+        # Errors
+        if result.errors:
+            print(f"\nErrors:")
+            for err in result.errors:
+                print(f"  - {err}")
+
+        if result.passed and args.approve:
+            gate.approve()
+            print(f"\nStudy approved for optimization!")
+        elif result.passed:
+            print(f"\nTo approve: atomizer gate {args.study} --approve")
+
+        gate.save_result(result)
+        return 0 if result.passed else 1
+
+    except Exception as e:
+        print(f"\nError: {e}")
+        if args.verbose:
+            import traceback
+
+            traceback.print_exc()
+        return 1
+
+
+def cmd_finalize(args) -> int:
+    """Generate final report for a study."""
+    HTMLReportGenerator = get_report_generator()
+
+    study_path = Path(args.study)
+    if not study_path.is_absolute():
+        study_path = PROJECT_ROOT / "studies" / args.study
+
+    if not study_path.exists():
+        print(f"Error: Study not found: {study_path}")
+        return 1
+
+    print(f"Generating report for: {study_path.name}")
+    print("=" * 60)
+
+    try:
+        generator = HTMLReportGenerator(study_path)
+        report_path = generator.generate(include_pdf=getattr(args, "pdf", False))
+
+        print(f"\nReport generated successfully!")
+        print(f"  HTML: {report_path}")
+        print(f"  Data: {report_path.parent / 'data'}")
+
+        if getattr(args, "open", False):
+            import webbrowser
+
+            webbrowser.open(str(report_path))
+        else:
+            print(f"\nOpen in browser: file://{report_path}")
+
+        return 0
+
+    except Exception as e:
+        print(f"\nError: {e}")
+        if args.verbose:
+            import traceback
+
+            traceback.print_exc()
+        return 1
+
+
+def cmd_list_studies(args) -> int:
+    """List all studies and inbox items."""
+    studies_dir = PROJECT_ROOT / "studies"
+
+    print("Atomizer Studies")
+    print("=" * 60)
+
+    # Inbox items
+    inbox_dir = studies_dir / "_inbox"
+    if inbox_dir.exists():
+        inbox_items = [d for d in inbox_dir.iterdir() if d.is_dir() and not d.name.startswith(".")]
+        if inbox_items:
+            print("\nPending Intake (_inbox/):")
+            for item in sorted(inbox_items):
+                has_config = (item / "intake.yaml").exists()
+                has_model = bool(list(item.glob("**/*.sim")))
+                status = []
+                if has_config:
+                    status.append("yaml")
+                if has_model:
+                    status.append("model")
+                print(f"  {item.name:<30} [{', '.join(status) or 'empty'}]")
+
+    # Active studies
+    print("\nStudies:")
+    for study_dir in sorted(studies_dir.iterdir()):
+        if (
+            study_dir.is_dir()
+            and not study_dir.name.startswith("_")
+            and not study_dir.name.startswith(".")
+        ):
+            has_spec = (study_dir / "atomizer_spec.json").exists() or (
+                study_dir / "optimization_config.json"
+            ).exists()
+            has_db = any(study_dir.rglob("study.db"))
+            has_approval = (study_dir / ".validation_approved").exists()
+
+            status = []
+            if has_spec:
+                status.append("configured")
+            if has_approval:
+                status.append("approved")
+            if has_db:
+                status.append("has_data")
+
+            print(f"  {study_dir.name:<30} [{', '.join(status) or 'new'}]")
+
+    return 0
+
+
 def main():
    parser = argparse.ArgumentParser(
        description="Atomizer - Neural-Accelerated Structural Optimization",
@@ -372,7 +637,7 @@ Examples:

  # Manual training
  python atomizer.py train --study my_study --epochs 100
-        """
+        """,
    )

    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
@@ -381,13 +646,14 @@ Examples:

    # neural-optimize command
    neural_parser = subparsers.add_parser(
-        "neural-optimize",
-        help="Run neural-accelerated optimization (main workflow)"
+        "neural-optimize", help="Run neural-accelerated optimization (main workflow)"
    )
    neural_parser.add_argument("--study", "-s", required=True, help="Study name")
    neural_parser.add_argument("--trials", "-n", type=int, default=500, help="Total trials")
    neural_parser.add_argument("--min-points", type=int, default=50, help="Min points for training")
-    neural_parser.add_argument("--retrain-every", type=int, default=50, help="Retrain after N new points")
+    neural_parser.add_argument(
+        "--retrain-every", type=int, default=50, help="Retrain after N new points"
+    )
    neural_parser.add_argument("--epochs", type=int, default=100, help="Training epochs")
    neural_parser.add_argument("--resume", action="store_true", help="Resume existing study")

@@ -414,6 +680,31 @@ Examples:
    validate_parser = subparsers.add_parser("validate", help="Validate study setup")
    validate_parser.add_argument("--study", "-s", required=True, help="Study name")

+    # ========================================================================
+    # NEW UX SYSTEM COMMANDS
+    # ========================================================================
+
+    # intake command
+    intake_parser = subparsers.add_parser("intake", help="Process an intake folder into a study")
+    intake_parser.add_argument("folder", help="Path to intake folder")
+    intake_parser.add_argument("--skip-baseline", action="store_true", help="Skip baseline solve")
+
+    # gate command (validation gate)
+    gate_parser = subparsers.add_parser("gate", help="Run validation gate with test trials")
+    gate_parser.add_argument("study", help="Study name or path")
+    gate_parser.add_argument("--skip-trials", action="store_true", help="Skip test trials")
+    gate_parser.add_argument("--trials", type=int, default=3, help="Number of test trials")
+    gate_parser.add_argument("--approve", action="store_true", help="Approve if validation passes")
+
+    # list command
+    list_studies_parser = subparsers.add_parser("list", help="List all studies and inbox items")
+
+    # finalize command
+    finalize_parser = subparsers.add_parser("finalize", help="Generate final HTML report")
+    finalize_parser.add_argument("study", help="Study name or path")
+    finalize_parser.add_argument("--pdf", action="store_true", help="Also generate PDF")
+    finalize_parser.add_argument("--open", action="store_true", help="Open report in browser")
+
    args = parser.parse_args()

    if not args.command:
@@ -429,7 +720,12 @@ Examples:
        "list-templates": cmd_list_templates,
        "status": cmd_status,
        "train": cmd_train,
-        "validate": cmd_validate
+        "validate": cmd_validate,
+        # New UX commands
+        "intake": cmd_intake,
+        "gate": cmd_gate,
+        "list": cmd_list_studies,
+        "finalize": cmd_finalize,
    }

    handler = commands.get(args.command)