Full implementation - Vision AI, config, improved pipeline

Major changes:
- vision_analyzer.py: Real OpenAI/Anthropic vision API integration
  - Component detection with confidence scores
  - Atomizer hints extraction (objectives, constraints, parameters)
  - Material and feature identification
  - Timeline correlation with transcript

- config.py: Full configuration system
  - API settings (provider, keys, models)
  - Processing settings (Whisper model, frame interval, scene detection)
  - Output settings (BOM, hints, PDF template)
  - Config file support (~/.cad-documenter.toml)

- audio_analyzer.py: Enhanced transcription
  - Audio stream detection
  - Graceful fallback for missing audio
  - Keyword extraction
  - Technical term detection
  - Timeline correlation

- video_processor.py: Smart frame extraction
  - Scene change detection via ffmpeg
  - Configurable thresholds
  - Best frame selection

- doc_generator.py: Improved output
  - Better Markdown templates
  - BOM CSV export
  - Atomizer hints JSON
  - Component cards

- cli.py: Rich CLI with progress indicators
  - Config file support
  - --init-config flag
  - Verbose mode
  - Better error messages

- tests: Comprehensive test suite
This commit is contained in:
Mario Lavoie
2026-01-27 20:16:44 +00:00
parent 1e94a98e5b
commit 148180c12e
9 changed files with 2084 additions and 270 deletions

View File

@@ -1,54 +1,288 @@
"""Basic tests for CAD-Documenter pipeline."""
"""Tests for CAD-Documenter pipeline."""
import pytest
from pathlib import Path
def test_imports():
class TestImports:
"""Test that all modules can be imported."""
from cad_documenter import __version__
from cad_documenter.cli import main
from cad_documenter.pipeline import DocumentationPipeline
from cad_documenter.video_processor import VideoProcessor
from cad_documenter.audio_analyzer import AudioAnalyzer
from cad_documenter.vision_analyzer import VisionAnalyzer
from cad_documenter.doc_generator import DocGenerator
assert __version__ == "0.1.0"
def test_version(self):
from cad_documenter import __version__
assert __version__ == "0.1.0"
def test_cli_import(self):
from cad_documenter.cli import main
assert callable(main)
def test_pipeline_import(self):
from cad_documenter.pipeline import DocumentationPipeline
assert DocumentationPipeline is not None
def test_video_processor_import(self):
from cad_documenter.video_processor import VideoProcessor, FrameInfo
assert VideoProcessor is not None
assert FrameInfo is not None
def test_audio_analyzer_import(self):
from cad_documenter.audio_analyzer import AudioAnalyzer, Transcript, TranscriptSegment
assert AudioAnalyzer is not None
assert Transcript is not None
def test_vision_analyzer_import(self):
from cad_documenter.vision_analyzer import (
VisionAnalyzer, Component, ComponentAnalysis, AtomizerHints
)
assert VisionAnalyzer is not None
assert Component is not None
assert AtomizerHints is not None
def test_doc_generator_import(self):
from cad_documenter.doc_generator import DocGenerator
assert DocGenerator is not None
def test_config_import(self):
from cad_documenter.config import (
Config, load_config, APIConfig, ProcessingConfig, OutputConfig
)
assert Config is not None
assert callable(load_config)
def test_transcript_dataclass():
class TestTranscript:
"""Test Transcript dataclass functionality."""
from cad_documenter.audio_analyzer import Transcript, TranscriptSegment
segments = [
TranscriptSegment(start=0.0, end=5.0, text="This is the main bracket"),
TranscriptSegment(start=5.0, end=10.0, text="It holds the motor"),
TranscriptSegment(start=10.0, end=15.0, text="Made of aluminum"),
]
def test_transcript_creation(self):
from cad_documenter.audio_analyzer import Transcript, TranscriptSegment
transcript = Transcript(segments=segments, full_text="This is the main bracket. It holds the motor. Made of aluminum.")
segments = [
TranscriptSegment(start=0.0, end=5.0, text="This is the main bracket"),
TranscriptSegment(start=5.0, end=10.0, text="It holds the motor"),
TranscriptSegment(start=10.0, end=15.0, text="Made of aluminum"),
]
# Test get_text_at
text = transcript.get_text_at(7.0, window=3.0)
assert "holds the motor" in text
assert "main bracket" in text
transcript = Transcript(
segments=segments,
full_text="This is the main bracket. It holds the motor. Made of aluminum."
)
assert len(transcript.segments) == 3
assert "bracket" in transcript.full_text
def test_get_text_at(self):
from cad_documenter.audio_analyzer import Transcript, TranscriptSegment
segments = [
TranscriptSegment(start=0.0, end=5.0, text="This is the main bracket"),
TranscriptSegment(start=5.0, end=10.0, text="It holds the motor"),
TranscriptSegment(start=10.0, end=15.0, text="Made of aluminum"),
]
transcript = Transcript(
segments=segments,
full_text="This is the main bracket. It holds the motor. Made of aluminum."
)
# Test getting text at specific timestamp
text = transcript.get_text_at(7.0, window=3.0)
assert "holds the motor" in text
assert "main bracket" in text
def test_get_segment_at(self):
from cad_documenter.audio_analyzer import Transcript, TranscriptSegment
segments = [
TranscriptSegment(start=0.0, end=5.0, text="First segment"),
TranscriptSegment(start=5.0, end=10.0, text="Second segment"),
]
transcript = Transcript(segments=segments, full_text="")
seg = transcript.get_segment_at(3.0)
assert seg is not None
assert seg.text == "First segment"
seg = transcript.get_segment_at(7.0)
assert seg is not None
assert seg.text == "Second segment"
def test_search(self):
from cad_documenter.audio_analyzer import Transcript, TranscriptSegment
segments = [
TranscriptSegment(start=0.0, end=5.0, text="The bracket is aluminum"),
TranscriptSegment(start=5.0, end=10.0, text="The motor is steel"),
]
transcript = Transcript(segments=segments, full_text="")
results = transcript.search("aluminum")
assert len(results) == 1
assert results[0][0].text == "The bracket is aluminum"
def test_component_dataclass():
class TestComponent:
"""Test Component dataclass."""
from cad_documenter.vision_analyzer import Component
component = Component(
name="Main Bracket",
description="Primary structural member",
function="Holds the motor",
material="Aluminum 6061-T6",
features=["4x M6 holes", "Fillet radii"],
)
def test_component_creation(self):
from cad_documenter.vision_analyzer import Component
assert component.name == "Main Bracket"
assert len(component.features) == 2
component = Component(
name="Main Bracket",
description="Primary structural member",
function="Holds the motor",
material="Aluminum 6061-T6",
features=["4x M6 holes", "Fillet radii"],
)
assert component.name == "Main Bracket"
assert len(component.features) == 2
assert component.material == "Aluminum 6061-T6"
def test_component_defaults(self):
from cad_documenter.vision_analyzer import Component
component = Component(name="Test", description="Test component")
assert component.function == ""
assert component.material == ""
assert component.features == []
assert component.confidence == 0.0
# TODO: Add integration tests with sample videos
class TestAtomizerHints:
"""Test AtomizerHints dataclass."""
def test_hints_creation(self):
from cad_documenter.vision_analyzer import AtomizerHints
hints = AtomizerHints(
objectives=[{"name": "mass", "direction": "minimize"}],
constraints=[{"type": "envelope", "value": "200mm"}],
parameters=["thickness", "fillet_radius"],
critical_regions=[{"feature": "fillet", "concern": "stress_concentration"}],
)
assert len(hints.objectives) == 1
assert hints.objectives[0]["name"] == "mass"
assert "thickness" in hints.parameters
class TestConfig:
"""Test configuration loading."""
def test_default_config(self):
from cad_documenter.config import Config
config = Config()
assert config.api.provider == "openai"
assert config.processing.whisper_model == "base"
assert config.output.include_bom is True
def test_load_config_defaults(self):
from cad_documenter.config import load_config
# Should return defaults when no config file exists
config = load_config(Path("/nonexistent/config.toml"))
assert config.api.provider == "openai"
assert config.processing.frame_interval == 2.0
class TestDocGenerator:
"""Test documentation generation."""
def test_generate_creates_file(self, tmp_path):
from cad_documenter.doc_generator import DocGenerator
from cad_documenter.vision_analyzer import ComponentAnalysis, Component
generator = DocGenerator(tmp_path)
analysis = ComponentAnalysis(
assembly_name="Test Assembly",
summary="This is a test assembly.",
components=[
Component(
name="Test Part",
description="A test part",
material="Steel",
function="Testing",
)
],
)
doc_path = generator.generate(analysis)
assert doc_path.exists()
content = doc_path.read_text()
assert "Test Assembly" in content
assert "Test Part" in content
def test_generate_with_bom(self, tmp_path):
from cad_documenter.doc_generator import DocGenerator
from cad_documenter.vision_analyzer import ComponentAnalysis, Component
generator = DocGenerator(tmp_path)
analysis = ComponentAnalysis(
assembly_name="Test Assembly",
summary="Test",
components=[
Component(name="Part A", description="First", material="Aluminum"),
Component(name="Part B", description="Second", material="Steel"),
],
)
doc_path = generator.generate(analysis, bom=True)
content = doc_path.read_text()
assert "Bill of Materials" in content
assert "Part A" in content
assert "Part B" in content
# Check BOM CSV was created
csv_path = tmp_path / "bom.csv"
assert csv_path.exists()
def test_atomizer_hints_json(self, tmp_path):
from cad_documenter.doc_generator import DocGenerator
from cad_documenter.vision_analyzer import ComponentAnalysis, Component, AtomizerHints
import json
generator = DocGenerator(tmp_path)
analysis = ComponentAnalysis(
assembly_name="Test Assembly",
summary="Test",
components=[
Component(name="Bracket", description="Main bracket", material="Aluminum"),
],
atomizer_hints=AtomizerHints(
objectives=[{"name": "mass", "direction": "minimize"}],
parameters=["thickness"],
),
)
hints_path = generator.generate_atomizer_hints(analysis)
assert hints_path.exists()
hints = json.loads(hints_path.read_text())
assert hints["assembly_name"] == "Test Assembly"
assert len(hints["optimization_hints"]["objectives"]) == 1
# Integration tests (require actual video files)
class TestIntegration:
"""Integration tests - skipped without test videos."""
@pytest.mark.skip(reason="Requires test video file")
def test_full_pipeline(self, tmp_path):
from cad_documenter.pipeline import DocumentationPipeline
video_path = Path("tests/fixtures/sample.mp4")
pipeline = DocumentationPipeline(video_path, tmp_path)
results = pipeline.run_full_pipeline()
assert results["documentation"].exists()