Add comprehensive documentation and examples

- docs/USAGE.md: Full usage guide with CLI options, Python API, troubleshooting
- docs/ATOMIZER_INTEGRATION.md: Guide for FEA/Atomizer integration
- examples/sample_config.toml: Annotated configuration example
- README.md: Expanded with installation, usage, architecture
This commit is contained in:
Mario Lavoie
2026-01-27 20:18:28 +00:00
parent 148180c12e
commit ca51b10c45
7 changed files with 1010 additions and 191 deletions

View File

@@ -1,3 +1,34 @@
"""CAD-Documenter: Video walkthrough → Complete engineering documentation."""
"""CAD-Documenter: Video walkthrough → Engineering documentation."""
__version__ = "0.1.0"
__version__ = "0.2.0"
from .pipeline import DocumentationPipeline, create_pipeline, PipelineResult
from .config import Config, load_config
from .video_processor import VideoProcessor, FrameInfo, VideoMetadata
from .audio_analyzer import AudioAnalyzer, Transcript, TranscriptSegment
from .vision_analyzer import VisionAnalyzer, Component, ComponentAnalysis
from .doc_generator import DocGenerator
__all__ = [
# Main entry points
"DocumentationPipeline",
"create_pipeline",
"PipelineResult",
# Configuration
"Config",
"load_config",
# Video processing
"VideoProcessor",
"FrameInfo",
"VideoMetadata",
# Audio processing
"AudioAnalyzer",
"Transcript",
"TranscriptSegment",
# Vision analysis
"VisionAnalyzer",
"Component",
"ComponentAnalysis",
# Documentation
"DocGenerator",
]

View File

@@ -5,208 +5,379 @@ from pathlib import Path
import click
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn
from rich.panel import Panel
from rich.table import Table
from .config import load_config, create_default_config
from .pipeline import DocumentationPipeline
from .pipeline import DocumentationPipeline, PipelineProgress, PipelineStage, create_pipeline
from .config import Config, load_config
console = Console()
def print_banner():
"""Print welcome banner."""
"""Print the CAD-Documenter banner."""
console.print(Panel.fit(
"[bold blue]CAD-Documenter[/bold blue] v0.1.0\n"
"[bold blue]CAD-Documenter[/bold blue] v0.2.0\n"
"[dim]Video walkthrough → Engineering documentation[/dim]",
border_style="blue"
))
@click.command()
def progress_handler(progress: PipelineProgress):
"""Handle progress updates from pipeline."""
stage_icons = {
PipelineStage.INIT: "🔧",
PipelineStage.FRAMES: "🎬",
PipelineStage.TRANSCRIPTION: "🎤",
PipelineStage.ANALYSIS: "🔍",
PipelineStage.DOCUMENTATION: "📝",
PipelineStage.PDF: "📄",
PipelineStage.COMPLETE: "",
}
icon = stage_icons.get(progress.stage, "")
if progress.error:
console.print(f" [red]✗[/red] {progress.message}")
else:
console.print(f" {icon} {progress.message}")
@click.group(invoke_without_command=True)
@click.pass_context
def cli(ctx):
"""CAD-Documenter: Generate engineering documentation from video walkthroughs."""
if ctx.invoked_subcommand is None:
click.echo(ctx.get_help())
@cli.command()
@click.argument("video", type=click.Path(exists=True, path_type=Path))
@click.option("-o", "--output", type=click.Path(path_type=Path), help="Output directory")
@click.option("--frames-only", is_flag=True, help="Only extract frames, skip documentation")
@click.option("--skip-transcription", is_flag=True, help="Skip audio transcription")
@click.option("--atomizer-hints", is_flag=True, help="Generate Atomizer FEA hints")
@click.option("--bom", is_flag=True, help="Generate Bill of Materials")
@click.option("--pdf", is_flag=True, help="Generate PDF via Atomaste Report Standard")
@click.option("--frame-interval", type=float, help="Seconds between frame extractions")
@click.option("--whisper-model", type=click.Choice(["tiny", "base", "small", "medium", "large"]), help="Whisper model size")
@click.option("--api-provider", type=click.Choice(["openai", "anthropic"]), help="Vision API provider")
@click.option("--config", "config_path", type=click.Path(exists=True, path_type=Path), help="Config file path")
@click.option("--init-config", is_flag=True, help="Create default config file and exit")
@click.option("-v", "--verbose", is_flag=True, help="Verbose output")
@click.version_option()
def main(
@click.option("--frame-mode", type=click.Choice(["interval", "scene", "hybrid"]),
default="hybrid", help="Frame extraction mode")
@click.option("--frame-interval", default=2.0, help="Seconds between frames (interval mode)")
@click.option("--whisper-model", default="base",
help="Whisper model size (tiny/base/small/medium/large)")
@click.option("--vision-provider", type=click.Choice(["anthropic", "openai"]),
default="anthropic", help="Vision API provider")
@click.option("--vision-model", default=None, help="Vision model name (provider-specific)")
@click.option("--config", type=click.Path(path_type=Path), help="Config file path")
@click.option("--verbose", "-v", is_flag=True, help="Verbose output")
def process(
video: Path,
output: Path | None,
frames_only: bool,
skip_transcription: bool,
atomizer_hints: bool,
bom: bool,
pdf: bool,
frame_interval: float | None,
whisper_model: str | None,
api_provider: str | None,
config_path: Path | None,
init_config: bool,
frame_mode: str,
frame_interval: float,
whisper_model: str,
vision_provider: str,
vision_model: str | None,
config: Path | None,
verbose: bool,
):
"""
Generate engineering documentation from a CAD walkthrough video.
Process a video walkthrough and generate documentation.
VIDEO: Path to the video file (.mp4, .mov, .avi, etc.)
Examples:
cad-doc walkthrough.mp4
cad-doc video.mp4 --output ./docs --bom --atomizer-hints
cad-doc video.mp4 --pdf --whisper-model medium
cad-doc process video.mp4
cad-doc process video.mp4 --atomizer-hints --bom --pdf
cad-doc process video.mp4 --frame-mode scene --vision-provider openai
"""
print_banner()
# Handle --init-config
if init_config:
default_path = Path.home() / ".cad-documenter.toml"
create_default_config(default_path)
console.print(f"[green]✓[/green] Created config file: {default_path}")
console.print("[dim]Edit this file to configure API keys and defaults.[/dim]")
return
# Load configuration
config = load_config(config_path)
console.print(f"\n📹 Processing: [cyan]{video}[/cyan]")
# Load or create config
cfg = load_config(config)
# Override config with CLI options
if frame_interval is not None:
config.processing.frame_interval = frame_interval
if whisper_model is not None:
config.processing.whisper_model = whisper_model
if api_provider is not None:
config.api.provider = api_provider
# Check API key
if not frames_only and not config.api.api_key:
provider = config.api.provider.upper()
console.print(f"[red]Error:[/red] No API key found for {config.api.provider}.")
console.print(f"Set [cyan]{provider}_API_KEY[/cyan] environment variable or add to config file.")
console.print(f"\nTo create a config file: [cyan]cad-doc --init-config[/cyan]")
sys.exit(1)
console.print(f"Processing: [cyan]{video.name}[/cyan]")
if verbose:
console.print(f" API: {config.api.provider} ({config.api.vision_model or 'default'})")
console.print(f" Whisper: {config.processing.whisper_model}")
cfg.frame_extraction.mode = frame_mode
cfg.frame_extraction.interval_seconds = frame_interval
cfg.transcription.model = whisper_model
cfg.vision.provider = vision_provider
if vision_model:
cfg.vision.model = vision_model
# Default output directory
if output is None:
output = video.parent / f"{video.stem}_docs"
output.mkdir(parents=True, exist_ok=True)
console.print(f"Output: [cyan]{output}[/cyan]")
# Initialize pipeline
console.print(f"📁 Output: [cyan]{output}[/cyan]\n")
# Create pipeline
pipeline = DocumentationPipeline(
video_path=video,
output_dir=output,
config=cfg,
progress_callback=progress_handler if verbose else None,
)
# Show video info
try:
pipeline = DocumentationPipeline(
video_path=video,
output_dir=output,
config=config,
)
except ValueError as e:
console.print(f"[red]Configuration error:[/red] {e}")
sys.exit(1)
# Frames only mode
if frames_only:
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
console=console,
) as progress:
progress.add_task("Extracting frames...", total=None)
frames = pipeline.extract_frames()
console.print(f"[green]✓[/green] Extracted {len(frames)} frames to {output / 'frames'}")
return
# Full pipeline
metadata = pipeline.get_video_metadata()
console.print(f" Duration: {metadata.duration:.1f}s | "
f"Resolution: {metadata.width}x{metadata.height} | "
f"Audio: {'' if metadata.has_audio else ''}")
except Exception:
pass
console.print()
# Run pipeline with progress
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
console=console,
disable=verbose, # Disable progress bar if verbose (use callback instead)
) as progress:
# Step 1: Extract frames
task1 = progress.add_task("[cyan]Step 1/4:[/cyan] Extracting frames...", total=None)
frames = pipeline.extract_frames()
progress.update(task1, description=f"[green]✓[/green] Extracted {len(frames)} frames")
progress.remove_task(task1)
# Step 2: Transcribe
task2 = progress.add_task("[cyan]Step 2/4:[/cyan] Transcribing audio...", total=None)
transcript = pipeline.transcribe_audio()
seg_count = len(transcript.segments) if transcript.segments else 0
progress.update(task2, description=f"[green]✓[/green] Transcribed {seg_count} segments")
progress.remove_task(task2)
if verbose and transcript.full_text:
console.print(Panel(
transcript.full_text[:500] + ("..." if len(transcript.full_text) > 500 else ""),
title="Transcript Preview",
border_style="dim"
))
# Step 3: Analyze
task3 = progress.add_task("[cyan]Step 3/4:[/cyan] Analyzing components...", total=None)
analysis = pipeline.analyze_components(frames, transcript)
comp_count = len(analysis.components)
progress.update(task3, description=f"[green]✓[/green] Identified {comp_count} components")
progress.remove_task(task3)
if verbose and analysis.components:
console.print("\n[bold]Components found:[/bold]")
for c in analysis.components:
console.print(f"{c.name} ({c.material or 'material unknown'})")
# Step 4: Generate documentation
task4 = progress.add_task("[cyan]Step 4/4:[/cyan] Generating documentation...", total=None)
doc_path = pipeline.generate_documentation(
analysis,
atomizer_hints=atomizer_hints or config.output.include_atomizer_hints,
bom=bom or config.output.include_bom,
task = progress.add_task("Processing...", total=100)
def update_progress(p: PipelineProgress):
progress.update(task, completed=int(p.progress * 100), description=p.message)
if verbose:
progress_handler(p)
pipeline.progress_callback = update_progress
result = pipeline.run(
frames_only=frames_only,
skip_transcription=skip_transcription,
atomizer_hints=atomizer_hints,
bom=bom,
pdf=pdf,
)
progress.update(task4, description=f"[green]✓[/green] Documentation generated")
progress.remove_task(task4)
# Generate PDF if requested
if pdf:
console.print("[cyan]Generating PDF...[/cyan]")
try:
pdf_path = pipeline.generate_pdf(doc_path)
console.print(f"[green]✓[/green] PDF: {pdf_path}")
except Exception as e:
console.print(f"[yellow]Warning:[/yellow] PDF generation failed: {e}")
# Summary
# Print results
console.print()
console.print(Panel.fit(
f"[bold green]Documentation complete![/bold green]\n\n"
f"📄 [cyan]{doc_path}[/cyan]\n"
f"📊 {len(analysis.components)} components documented\n"
f"🖼️ {len(frames)} frames extracted",
title="Summary",
border_style="green"
))
# Show atomizer hints summary if generated
if (atomizer_hints or config.output.include_atomizer_hints) and analysis.atomizer_hints:
hints = analysis.atomizer_hints
if hints.objectives or hints.constraints:
console.print("\n[bold]Atomizer Hints:[/bold]")
for obj in hints.objectives[:3]:
console.print(f" 🎯 {obj['direction'].capitalize()} {obj['name']}")
for constraint in hints.constraints[:3]:
console.print(f" 📏 {constraint['type']}: {constraint['value']}")
if result.success:
console.print(Panel.fit(
f"[bold green]✓ Documentation generated successfully![/bold green]\n\n"
f"📊 Frames extracted: {result.frames_extracted}\n"
f"🔧 Components found: {result.components_found}\n"
f"🎤 Audio duration: {result.transcript_duration:.1f}s",
title="Results",
border_style="green"
))
# Show output files
table = Table(title="Output Files", show_header=True)
table.add_column("Type", style="cyan")
table.add_column("Path")
if result.documentation_path:
table.add_row("Documentation", str(result.documentation_path))
if result.atomizer_hints_path:
table.add_row("Atomizer Hints", str(result.atomizer_hints_path))
if result.bom_path:
table.add_row("BOM", str(result.bom_path))
if result.pdf_path:
table.add_row("PDF", str(result.pdf_path))
console.print(table)
# Show warnings
if result.warnings:
console.print("\n[yellow]Warnings:[/yellow]")
for warning in result.warnings:
console.print(f" ⚠️ {warning}")
else:
console.print(Panel.fit(
f"[bold red]✗ Pipeline failed[/bold red]\n\n" +
"\n".join(result.errors),
title="Error",
border_style="red"
))
sys.exit(1)
@cli.command()
@click.argument("video", type=click.Path(exists=True, path_type=Path))
@click.option("-o", "--output", type=click.Path(path_type=Path), help="Output directory")
@click.option("--mode", type=click.Choice(["interval", "scene", "hybrid"]),
default="hybrid", help="Extraction mode")
@click.option("--interval", default=2.0, help="Seconds between frames")
@click.option("--threshold", default=0.3, help="Scene change threshold")
def frames(video: Path, output: Path | None, mode: str, interval: float, threshold: float):
"""
Extract frames from a video without full processing.
Useful for previewing what frames will be analyzed.
"""
from .video_processor import VideoProcessor
from .config import FrameExtractionConfig
print_banner()
console.print(f"\n📹 Extracting frames from: [cyan]{video}[/cyan]")
if output is None:
output = video.parent / f"{video.stem}_frames"
config = FrameExtractionConfig(
mode=mode,
interval_seconds=interval,
scene_threshold=threshold,
)
processor = VideoProcessor(video, output, config)
with console.status("Extracting frames..."):
frames_list = processor.extract_frames()
console.print(f"\n[green]✓[/green] Extracted {len(frames_list)} frames to {output}")
# Show frame timestamps
table = Table(title="Extracted Frames")
table.add_column("#", style="dim")
table.add_column("Timestamp")
table.add_column("File")
table.add_column("Scene Score")
for i, frame in enumerate(frames_list[:20]): # Show first 20
table.add_row(
str(i + 1),
f"{frame.timestamp:.2f}s",
frame.path.name,
f"{frame.scene_score:.2f}" if frame.scene_score else "-"
)
if len(frames_list) > 20:
table.add_row("...", f"({len(frames_list) - 20} more)", "", "")
console.print(table)
@cli.command()
@click.argument("video", type=click.Path(exists=True, path_type=Path))
@click.option("--model", default="base", help="Whisper model size")
@click.option("--output", "-o", type=click.Path(path_type=Path), help="Output file")
def transcribe(video: Path, model: str, output: Path | None):
"""
Transcribe audio from a video file.
Outputs transcript with timestamps.
"""
from .audio_analyzer import AudioAnalyzer
from .config import TranscriptionConfig
print_banner()
console.print(f"\n🎤 Transcribing: [cyan]{video}[/cyan]")
config = TranscriptionConfig(model=model)
analyzer = AudioAnalyzer(video, config)
with console.status(f"Transcribing with Whisper ({model})..."):
transcript = analyzer.transcribe()
console.print(f"\n[green]✓[/green] Transcribed {len(transcript.segments)} segments")
console.print(f" Duration: {transcript.duration:.1f}s")
console.print(f" Language: {transcript.language}\n")
# Save or display transcript
if output:
lines = []
for seg in transcript.segments:
lines.append(f"[{seg.start:.2f} - {seg.end:.2f}] {seg.text}")
output.write_text("\n".join(lines))
console.print(f"Saved to: {output}")
else:
for seg in transcript.segments[:10]:
console.print(f"[dim][{seg.start:.1f}s][/dim] {seg.text}")
if len(transcript.segments) > 10:
console.print(f"[dim]... ({len(transcript.segments) - 10} more segments)[/dim]")
@cli.command()
@click.option("--output", "-o", type=click.Path(path_type=Path), help="Output config file")
def init(output: Path | None):
"""
Create a default configuration file.
"""
from .config import Config
if output is None:
output = Path(".cad-documenter.json")
config = Config()
config.to_file(output)
console.print(f"[green]✓[/green] Created config file: {output}")
console.print("\nEdit this file to customize:")
console.print(" - Vision model and provider")
console.print(" - Whisper transcription settings")
console.print(" - Frame extraction parameters")
@cli.command()
@click.argument("video", type=click.Path(exists=True, path_type=Path))
def info(video: Path):
"""
Show information about a video file.
"""
from .video_processor import VideoProcessor
processor = VideoProcessor(video, Path("/tmp"))
metadata = processor.get_metadata()
table = Table(title=f"Video Info: {video.name}")
table.add_column("Property", style="cyan")
table.add_column("Value")
table.add_row("Duration", f"{metadata.duration:.2f}s ({metadata.duration/60:.1f} min)")
table.add_row("Resolution", f"{metadata.width}x{metadata.height}")
table.add_row("FPS", f"{metadata.fps:.2f}")
table.add_row("Codec", metadata.codec)
table.add_row("Has Audio", "" if metadata.has_audio else "")
table.add_row("File Size", f"{video.stat().st_size / 1024 / 1024:.1f} MB")
console.print(table)
# Legacy command for backwards compatibility
@cli.command(name="main", hidden=True)
@click.argument("video", type=click.Path(exists=True, path_type=Path))
@click.option("-o", "--output", type=click.Path(path_type=Path))
@click.option("--frames-only", is_flag=True)
@click.option("--atomizer-hints", is_flag=True)
@click.option("--bom", is_flag=True)
@click.option("--pdf", is_flag=True)
@click.option("--frame-interval", default=2.0)
@click.option("--whisper-model", default="base")
@click.pass_context
def main_legacy(ctx, video, output, frames_only, atomizer_hints, bom, pdf, frame_interval, whisper_model):
"""Legacy entry point - redirects to process command."""
ctx.invoke(
process,
video=video,
output=output,
frames_only=frames_only,
atomizer_hints=atomizer_hints,
bom=bom,
pdf=pdf,
frame_interval=frame_interval,
whisper_model=whisper_model,
)
def main():
"""Main entry point."""
cli()
if __name__ == "__main__":