Files
Atomizer/optimization_engine/extractors/extractor_library.py
Anto01 eabcc4c3ca refactor: Major reorganization of optimization_engine module structure
BREAKING CHANGE: Module paths have been reorganized for better maintainability.
Backwards compatibility aliases with deprecation warnings are provided.

New Structure:
- core/           - Optimization runners (runner, intelligent_optimizer, etc.)
- processors/     - Data processing
  - surrogates/   - Neural network surrogates
- nx/             - NX/Nastran integration (solver, updater, session_manager)
- study/          - Study management (creator, wizard, state, reset)
- reporting/      - Reports and analysis (visualizer, report_generator)
- config/         - Configuration management (manager, builder)
- utils/          - Utilities (logger, auto_doc, etc.)
- future/         - Research/experimental code

Migration:
- ~200 import changes across 125 files
- All __init__.py files use lazy loading to avoid circular imports
- Backwards compatibility layer supports old import paths with warnings
- All existing functionality preserved

To migrate existing code:
  OLD: from optimization_engine.nx_solver import NXSolver
  NEW: from optimization_engine.nx.solver import NXSolver

  OLD: from optimization_engine.runner import OptimizationRunner
  NEW: from optimization_engine.core.runner import OptimizationRunner

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 12:30:59 -05:00

234 lines
8.0 KiB
Python

"""
Extractor Library Manager - Phase 3.2 Architecture Refactor
Manages a centralized library of reusable extractors to prevent code duplication
and keep study folders clean.
Architecture Principles:
1. Reusable extractors stored in optimization_engine/extractors/
2. Study folders only contain metadata (which extractors were used)
3. First-time generation adds to library with documentation
4. Subsequent requests reuse existing library code
Author: Antoine Letarte
Date: 2025-11-17
Phase: 3.2 Architecture Refactor
"""
import json
import hashlib
from pathlib import Path
from typing import Dict, Any, List, Optional
import logging
logger = logging.getLogger(__name__)
class ExtractorLibrary:
"""
Centralized library of reusable FEA result extractors.
Prevents code duplication by maintaining a core library of extractors
that can be reused across all optimization studies.
"""
def __init__(self, library_dir: Optional[Path] = None):
"""
Initialize extractor library.
Args:
library_dir: Directory for core extractor library
(default: optimization_engine/extractors/)
"""
if library_dir is None:
library_dir = Path(__file__).parent / "extractors"
self.library_dir = Path(library_dir)
self.library_dir.mkdir(parents=True, exist_ok=True)
# Create __init__.py for Python package
init_file = self.library_dir / "__init__.py"
if not init_file.exists():
init_file.write_text('"""Core extractor library for Atomizer."""\n')
# Library catalog - tracks all available extractors
self.catalog_file = self.library_dir / "catalog.json"
self.catalog = self._load_catalog()
logger.info(f"Extractor library initialized: {self.library_dir}")
logger.info(f"Library contains {len(self.catalog)} extractors")
def _load_catalog(self) -> Dict[str, Any]:
"""Load extractor catalog from disk."""
if self.catalog_file.exists():
with open(self.catalog_file) as f:
return json.load(f)
return {}
def _save_catalog(self):
"""Save extractor catalog to disk."""
with open(self.catalog_file, 'w') as f:
json.dump(self.catalog, f, indent=2)
def _compute_signature(self, llm_feature: Dict[str, Any]) -> str:
"""
Compute unique signature for an extractor based on its functionality.
Two extractors are considered identical if they have the same:
- Action (e.g., extract_displacement)
- Domain (e.g., result_extraction)
- Key parameters (e.g., result_type, metric)
"""
# Normalize the feature specification
signature_data = {
'action': llm_feature.get('action', ''),
'domain': llm_feature.get('domain', ''),
'params': llm_feature.get('params', {})
}
# Create deterministic hash
signature_str = json.dumps(signature_data, sort_keys=True)
return hashlib.sha256(signature_str.encode()).hexdigest()[:16]
def get_or_create(self, llm_feature: Dict[str, Any], extractor_code: str) -> Path:
"""
Get existing extractor from library or add new one.
Args:
llm_feature: LLM feature specification (action, domain, params)
extractor_code: Generated Python code for the extractor
Returns:
Path to extractor module in core library
"""
# Compute signature to check if extractor already exists
signature = self._compute_signature(llm_feature)
# Check if extractor already exists in library
if signature in self.catalog:
extractor_info = self.catalog[signature]
extractor_file = self.library_dir / extractor_info['filename']
if extractor_file.exists():
logger.info(f"Reusing existing extractor: {extractor_info['name']}")
return extractor_file
# Create new extractor in library
action = llm_feature.get('action', 'unknown_action')
filename = f"{action}.py"
extractor_file = self.library_dir / filename
# Write extractor code to library
extractor_file.write_text(extractor_code)
# Add to catalog
self.catalog[signature] = {
'name': action,
'filename': filename,
'action': llm_feature.get('action'),
'domain': llm_feature.get('domain'),
'description': llm_feature.get('description', ''),
'params': llm_feature.get('params', {}),
'signature': signature
}
self._save_catalog()
logger.info(f"Added new extractor to library: {action}")
return extractor_file
def get_extractor_metadata(self, signature: str) -> Optional[Dict[str, Any]]:
"""Get metadata for an extractor by its signature."""
return self.catalog.get(signature)
def list_extractors(self) -> List[Dict[str, Any]]:
"""List all extractors in the library."""
return list(self.catalog.values())
def get_library_summary(self) -> str:
"""Generate human-readable summary of library contents."""
lines = []
lines.append("=" * 80)
lines.append("ATOMIZER EXTRACTOR LIBRARY")
lines.append("=" * 80)
lines.append("")
lines.append(f"Location: {self.library_dir}")
lines.append(f"Total extractors: {len(self.catalog)}")
lines.append("")
if self.catalog:
lines.append("Available Extractors:")
lines.append("-" * 80)
for signature, info in self.catalog.items():
lines.append(f"\n{info['name']}")
lines.append(f" Domain: {info['domain']}")
lines.append(f" Description: {info['description']}")
lines.append(f" File: {info['filename']}")
lines.append(f" Signature: {signature}")
else:
lines.append("Library is empty. Extractors will be added on first use.")
lines.append("")
lines.append("=" * 80)
return "\n".join(lines)
def create_study_manifest(extractors_used: List[str], output_dir: Path):
"""
Create a manifest file documenting which extractors were used in a study.
This replaces the old approach of copying extractor code into study folders.
Now we just record which library extractors were used.
Args:
extractors_used: List of extractor signatures used in this study
output_dir: Study output directory
"""
manifest = {
'extractors_used': extractors_used,
'extractor_library': 'optimization_engine/extractors/',
'note': 'Extractors are stored in the core library, not in this study folder'
}
manifest_file = output_dir / "extractors_manifest.json"
with open(manifest_file, 'w') as f:
json.dump(manifest, f, indent=2)
logger.info(f"Study manifest created: {manifest_file}")
if __name__ == '__main__':
"""Test the extractor library system."""
# Initialize library
library = ExtractorLibrary()
# Print summary
print(library.get_library_summary())
# Test adding an extractor
test_feature = {
'action': 'extract_displacement',
'domain': 'result_extraction',
'description': 'Extract displacement from OP2 file',
'params': {'result_type': 'displacement', 'metric': 'max'}
}
test_code = '''"""Extract displacement from OP2 file."""
def extract_displacement(op2_file):
# Implementation here
pass
'''
extractor_path = library.get_or_create(test_feature, test_code)
print(f"\nExtractor created/retrieved: {extractor_path}")
# Try to add it again - should reuse existing
extractor_path2 = library.get_or_create(test_feature, test_code)
print(f"Second call (should reuse): {extractor_path2}")
# Verify they're the same
assert extractor_path == extractor_path2, "Should reuse existing extractor!"
print("\n[SUCCESS] Extractor deduplication working correctly!")