Full implementation - Vision AI, config, improved pipeline

Major changes:
- vision_analyzer.py: Real OpenAI/Anthropic vision API integration
  - Component detection with confidence scores
  - Atomizer hints extraction (objectives, constraints, parameters)
  - Material and feature identification
  - Timeline correlation with transcript

- config.py: Full configuration system
  - API settings (provider, keys, models)
  - Processing settings (Whisper model, frame interval, scene detection)
  - Output settings (BOM, hints, PDF template)
  - Config file support (~/.cad-documenter.toml)

- audio_analyzer.py: Enhanced transcription
  - Audio stream detection
  - Graceful fallback for missing audio
  - Keyword extraction
  - Technical term detection
  - Timeline correlation

- video_processor.py: Smart frame extraction
  - Scene change detection via ffmpeg
  - Configurable thresholds
  - Best frame selection

- doc_generator.py: Improved output
  - Better Markdown templates
  - BOM CSV export
  - Atomizer hints JSON
  - Component cards

- cli.py: Rich CLI with progress indicators
  - Config file support
  - --init-config flag
  - Verbose mode
  - Better error messages

- tests: Comprehensive test suite
This commit is contained in:
Mario Lavoie
2026-01-27 20:16:44 +00:00
parent 1e94a98e5b
commit 148180c12e
9 changed files with 2084 additions and 270 deletions

View File

@@ -1,14 +1,28 @@
"""CAD-Documenter CLI - Main entry point."""
import click
import sys
from pathlib import Path
from rich.console import Console
import click
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn
from rich.panel import Panel
from .config import load_config, create_default_config
from .pipeline import DocumentationPipeline
console = Console()
def print_banner():
"""Print welcome banner."""
console.print(Panel.fit(
"[bold blue]CAD-Documenter[/bold blue] v0.1.0\n"
"[dim]Video walkthrough → Engineering documentation[/dim]",
border_style="blue"
))
@click.command()
@click.argument("video", type=click.Path(exists=True, path_type=Path))
@click.option("-o", "--output", type=click.Path(path_type=Path), help="Output directory")
@@ -16,8 +30,12 @@ console = Console()
@click.option("--atomizer-hints", is_flag=True, help="Generate Atomizer FEA hints")
@click.option("--bom", is_flag=True, help="Generate Bill of Materials")
@click.option("--pdf", is_flag=True, help="Generate PDF via Atomaste Report Standard")
@click.option("--frame-interval", default=2.0, help="Seconds between frame extractions")
@click.option("--whisper-model", default="base", help="Whisper model size (tiny/base/small/medium/large)")
@click.option("--frame-interval", type=float, help="Seconds between frame extractions")
@click.option("--whisper-model", type=click.Choice(["tiny", "base", "small", "medium", "large"]), help="Whisper model size")
@click.option("--api-provider", type=click.Choice(["openai", "anthropic"]), help="Vision API provider")
@click.option("--config", "config_path", type=click.Path(exists=True, path_type=Path), help="Config file path")
@click.option("--init-config", is_flag=True, help="Create default config file and exit")
@click.option("-v", "--verbose", is_flag=True, help="Verbose output")
@click.version_option()
def main(
video: Path,
@@ -26,60 +44,169 @@ def main(
atomizer_hints: bool,
bom: bool,
pdf: bool,
frame_interval: float,
whisper_model: str,
frame_interval: float | None,
whisper_model: str | None,
api_provider: str | None,
config_path: Path | None,
init_config: bool,
verbose: bool,
):
"""
Generate engineering documentation from a CAD walkthrough video.
VIDEO: Path to the video file (.mp4, .mov, .avi, etc.)
"""
console.print(f"[bold blue]CAD-Documenter[/bold blue] v0.1.0")
console.print(f"Processing: [cyan]{video}[/cyan]")
Examples:
cad-doc walkthrough.mp4
cad-doc video.mp4 --output ./docs --bom --atomizer-hints
cad-doc video.mp4 --pdf --whisper-model medium
"""
print_banner()
# Handle --init-config
if init_config:
default_path = Path.home() / ".cad-documenter.toml"
create_default_config(default_path)
console.print(f"[green]✓[/green] Created config file: {default_path}")
console.print("[dim]Edit this file to configure API keys and defaults.[/dim]")
return
# Load configuration
config = load_config(config_path)
# Override config with CLI options
if frame_interval is not None:
config.processing.frame_interval = frame_interval
if whisper_model is not None:
config.processing.whisper_model = whisper_model
if api_provider is not None:
config.api.provider = api_provider
# Check API key
if not frames_only and not config.api.api_key:
provider = config.api.provider.upper()
console.print(f"[red]Error:[/red] No API key found for {config.api.provider}.")
console.print(f"Set [cyan]{provider}_API_KEY[/cyan] environment variable or add to config file.")
console.print(f"\nTo create a config file: [cyan]cad-doc --init-config[/cyan]")
sys.exit(1)
console.print(f"Processing: [cyan]{video.name}[/cyan]")
if verbose:
console.print(f" API: {config.api.provider} ({config.api.vision_model or 'default'})")
console.print(f" Whisper: {config.processing.whisper_model}")
# Default output directory
if output is None:
output = video.parent / f"{video.stem}_docs"
output.mkdir(parents=True, exist_ok=True)
# Run pipeline
pipeline = DocumentationPipeline(
video_path=video,
output_dir=output,
frame_interval=frame_interval,
whisper_model=whisper_model,
)
console.print(f"Output: [cyan]{output}[/cyan]")
# Initialize pipeline
try:
pipeline = DocumentationPipeline(
video_path=video,
output_dir=output,
config=config,
)
except ValueError as e:
console.print(f"[red]Configuration error:[/red] {e}")
sys.exit(1)
# Frames only mode
if frames_only:
console.print("[yellow]Extracting frames only...[/yellow]")
pipeline.extract_frames()
console.print(f"[green]✓[/green] Frames saved to {output / 'frames'}")
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
console=console,
) as progress:
progress.add_task("Extracting frames...", total=None)
frames = pipeline.extract_frames()
console.print(f"[green]✓[/green] Extracted {len(frames)} frames to {output / 'frames'}")
return
# Full pipeline
console.print("[yellow]Step 1/4:[/yellow] Extracting frames...")
frames = pipeline.extract_frames()
console.print(f" [green]✓[/green] Extracted {len(frames)} frames")
console.print("[yellow]Step 2/4:[/yellow] Transcribing audio...")
transcript = pipeline.transcribe_audio()
console.print(f" [green]✓[/green] Transcribed {len(transcript.segments)} segments")
console.print("[yellow]Step 3/4:[/yellow] Analyzing components...")
analysis = pipeline.analyze_components(frames, transcript)
console.print(f" [green]✓[/green] Identified {len(analysis.components)} components")
console.print("[yellow]Step 4/4:[/yellow] Generating documentation...")
doc_path = pipeline.generate_documentation(analysis, atomizer_hints=atomizer_hints, bom=bom)
console.print(f" [green]✓[/green] Documentation saved to {doc_path}")
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
console=console,
) as progress:
# Step 1: Extract frames
task1 = progress.add_task("[cyan]Step 1/4:[/cyan] Extracting frames...", total=None)
frames = pipeline.extract_frames()
progress.update(task1, description=f"[green]✓[/green] Extracted {len(frames)} frames")
progress.remove_task(task1)
# Step 2: Transcribe
task2 = progress.add_task("[cyan]Step 2/4:[/cyan] Transcribing audio...", total=None)
transcript = pipeline.transcribe_audio()
seg_count = len(transcript.segments) if transcript.segments else 0
progress.update(task2, description=f"[green]✓[/green] Transcribed {seg_count} segments")
progress.remove_task(task2)
if verbose and transcript.full_text:
console.print(Panel(
transcript.full_text[:500] + ("..." if len(transcript.full_text) > 500 else ""),
title="Transcript Preview",
border_style="dim"
))
# Step 3: Analyze
task3 = progress.add_task("[cyan]Step 3/4:[/cyan] Analyzing components...", total=None)
analysis = pipeline.analyze_components(frames, transcript)
comp_count = len(analysis.components)
progress.update(task3, description=f"[green]✓[/green] Identified {comp_count} components")
progress.remove_task(task3)
if verbose and analysis.components:
console.print("\n[bold]Components found:[/bold]")
for c in analysis.components:
console.print(f"{c.name} ({c.material or 'material unknown'})")
# Step 4: Generate documentation
task4 = progress.add_task("[cyan]Step 4/4:[/cyan] Generating documentation...", total=None)
doc_path = pipeline.generate_documentation(
analysis,
atomizer_hints=atomizer_hints or config.output.include_atomizer_hints,
bom=bom or config.output.include_bom,
)
progress.update(task4, description=f"[green]✓[/green] Documentation generated")
progress.remove_task(task4)
# Generate PDF if requested
if pdf:
console.print("[yellow]Generating PDF...[/yellow]")
pdf_path = pipeline.generate_pdf(doc_path)
console.print(f" [green]✓[/green] PDF saved to {pdf_path}")
console.print(f"\n[bold green]Done![/bold green] Output: {output}")
console.print("[cyan]Generating PDF...[/cyan]")
try:
pdf_path = pipeline.generate_pdf(doc_path)
console.print(f"[green]✓[/green] PDF: {pdf_path}")
except Exception as e:
console.print(f"[yellow]Warning:[/yellow] PDF generation failed: {e}")
# Summary
console.print()
console.print(Panel.fit(
f"[bold green]Documentation complete![/bold green]\n\n"
f"📄 [cyan]{doc_path}[/cyan]\n"
f"📊 {len(analysis.components)} components documented\n"
f"🖼️ {len(frames)} frames extracted",
title="Summary",
border_style="green"
))
# Show atomizer hints summary if generated
if (atomizer_hints or config.output.include_atomizer_hints) and analysis.atomizer_hints:
hints = analysis.atomizer_hints
if hints.objectives or hints.constraints:
console.print("\n[bold]Atomizer Hints:[/bold]")
for obj in hints.objectives[:3]:
console.print(f" 🎯 {obj['direction'].capitalize()} {obj['name']}")
for constraint in hints.constraints[:3]:
console.print(f" 📏 {constraint['type']}: {constraint['value']}")
if __name__ == "__main__":