Initial project scaffold - Phase 1 MVP structure
Core modules: - cli.py: Command-line interface with Click - pipeline.py: Main orchestrator - video_processor.py: Frame extraction with ffmpeg - audio_analyzer.py: Whisper transcription - vision_analyzer.py: Component detection (placeholder) - doc_generator.py: Markdown + PDF output Also includes: - pyproject.toml with uv/hatch config - Prompts for AI analysis - Basic tests - ROADMAP.md with 4-week plan
This commit is contained in:
3
src/cad_documenter/__init__.py
Normal file
3
src/cad_documenter/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""CAD-Documenter: Video walkthrough → Complete engineering documentation."""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
104
src/cad_documenter/audio_analyzer.py
Normal file
104
src/cad_documenter/audio_analyzer.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""Audio analysis module - transcription via Whisper."""
|
||||
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
|
||||
@dataclass
|
||||
class TranscriptSegment:
|
||||
"""A segment of transcribed audio."""
|
||||
start: float # seconds
|
||||
end: float
|
||||
text: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Transcript:
|
||||
"""Full transcript with segments."""
|
||||
segments: list[TranscriptSegment]
|
||||
full_text: str
|
||||
|
||||
def get_text_at(self, timestamp: float, window: float = 5.0) -> str:
|
||||
"""Get transcript text around a specific timestamp."""
|
||||
relevant = []
|
||||
for seg in self.segments:
|
||||
if seg.start <= timestamp + window and seg.end >= timestamp - window:
|
||||
relevant.append(seg.text)
|
||||
return " ".join(relevant)
|
||||
|
||||
|
||||
class AudioAnalyzer:
|
||||
"""Handles audio transcription using Whisper."""
|
||||
|
||||
def __init__(self, video_path: Path, model: str = "base"):
|
||||
self.video_path = video_path
|
||||
self.model = model
|
||||
|
||||
def transcribe(self) -> Transcript:
|
||||
"""
|
||||
Transcribe audio from video using Whisper.
|
||||
|
||||
Returns:
|
||||
Transcript object with segments and full text
|
||||
"""
|
||||
# Extract audio to temp file
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
||||
audio_path = Path(f.name)
|
||||
|
||||
# Extract audio using ffmpeg
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", str(self.video_path),
|
||||
"-vn", "-acodec", "pcm_s16le",
|
||||
"-ar", "16000", "-ac", "1",
|
||||
str(audio_path)
|
||||
]
|
||||
subprocess.run(cmd, capture_output=True)
|
||||
|
||||
# Run Whisper
|
||||
try:
|
||||
import whisper
|
||||
model = whisper.load_model(self.model)
|
||||
result = model.transcribe(str(audio_path), word_timestamps=True)
|
||||
|
||||
segments = []
|
||||
for seg in result.get("segments", []):
|
||||
segments.append(TranscriptSegment(
|
||||
start=seg["start"],
|
||||
end=seg["end"],
|
||||
text=seg["text"].strip()
|
||||
))
|
||||
|
||||
return Transcript(
|
||||
segments=segments,
|
||||
full_text=result.get("text", "").strip()
|
||||
)
|
||||
|
||||
finally:
|
||||
# Cleanup temp file
|
||||
audio_path.unlink(missing_ok=True)
|
||||
|
||||
def extract_keywords(self, transcript: Transcript) -> list[str]:
|
||||
"""Extract likely component names and technical terms."""
|
||||
# Simple keyword extraction - can be enhanced with NLP
|
||||
keywords = []
|
||||
indicator_phrases = [
|
||||
"this is the", "this is a", "here we have",
|
||||
"the main", "called the", "known as",
|
||||
"this part", "this component", "this assembly"
|
||||
]
|
||||
|
||||
text_lower = transcript.full_text.lower()
|
||||
for phrase in indicator_phrases:
|
||||
if phrase in text_lower:
|
||||
# Find what comes after the phrase
|
||||
idx = text_lower.find(phrase)
|
||||
after = transcript.full_text[idx + len(phrase):idx + len(phrase) + 50]
|
||||
# Take first few words
|
||||
words = after.strip().split()[:3]
|
||||
if words:
|
||||
keywords.append(" ".join(words).strip(",.;:"))
|
||||
|
||||
return list(set(keywords))
|
||||
86
src/cad_documenter/cli.py
Normal file
86
src/cad_documenter/cli.py
Normal file
@@ -0,0 +1,86 @@
|
||||
"""CAD-Documenter CLI - Main entry point."""
|
||||
|
||||
import click
|
||||
from pathlib import Path
|
||||
from rich.console import Console
|
||||
|
||||
from .pipeline import DocumentationPipeline
|
||||
|
||||
console = Console()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("video", type=click.Path(exists=True, path_type=Path))
|
||||
@click.option("-o", "--output", type=click.Path(path_type=Path), help="Output directory")
|
||||
@click.option("--frames-only", is_flag=True, help="Only extract frames, skip documentation")
|
||||
@click.option("--atomizer-hints", is_flag=True, help="Generate Atomizer FEA hints")
|
||||
@click.option("--bom", is_flag=True, help="Generate Bill of Materials")
|
||||
@click.option("--pdf", is_flag=True, help="Generate PDF via Atomaste Report Standard")
|
||||
@click.option("--frame-interval", default=2.0, help="Seconds between frame extractions")
|
||||
@click.option("--whisper-model", default="base", help="Whisper model size (tiny/base/small/medium/large)")
|
||||
@click.version_option()
|
||||
def main(
|
||||
video: Path,
|
||||
output: Path | None,
|
||||
frames_only: bool,
|
||||
atomizer_hints: bool,
|
||||
bom: bool,
|
||||
pdf: bool,
|
||||
frame_interval: float,
|
||||
whisper_model: str,
|
||||
):
|
||||
"""
|
||||
Generate engineering documentation from a CAD walkthrough video.
|
||||
|
||||
VIDEO: Path to the video file (.mp4, .mov, .avi, etc.)
|
||||
"""
|
||||
console.print(f"[bold blue]CAD-Documenter[/bold blue] v0.1.0")
|
||||
console.print(f"Processing: [cyan]{video}[/cyan]")
|
||||
|
||||
# Default output directory
|
||||
if output is None:
|
||||
output = video.parent / f"{video.stem}_docs"
|
||||
|
||||
output.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Run pipeline
|
||||
pipeline = DocumentationPipeline(
|
||||
video_path=video,
|
||||
output_dir=output,
|
||||
frame_interval=frame_interval,
|
||||
whisper_model=whisper_model,
|
||||
)
|
||||
|
||||
if frames_only:
|
||||
console.print("[yellow]Extracting frames only...[/yellow]")
|
||||
pipeline.extract_frames()
|
||||
console.print(f"[green]✓[/green] Frames saved to {output / 'frames'}")
|
||||
return
|
||||
|
||||
# Full pipeline
|
||||
console.print("[yellow]Step 1/4:[/yellow] Extracting frames...")
|
||||
frames = pipeline.extract_frames()
|
||||
console.print(f" [green]✓[/green] Extracted {len(frames)} frames")
|
||||
|
||||
console.print("[yellow]Step 2/4:[/yellow] Transcribing audio...")
|
||||
transcript = pipeline.transcribe_audio()
|
||||
console.print(f" [green]✓[/green] Transcribed {len(transcript.segments)} segments")
|
||||
|
||||
console.print("[yellow]Step 3/4:[/yellow] Analyzing components...")
|
||||
analysis = pipeline.analyze_components(frames, transcript)
|
||||
console.print(f" [green]✓[/green] Identified {len(analysis.components)} components")
|
||||
|
||||
console.print("[yellow]Step 4/4:[/yellow] Generating documentation...")
|
||||
doc_path = pipeline.generate_documentation(analysis, atomizer_hints=atomizer_hints, bom=bom)
|
||||
console.print(f" [green]✓[/green] Documentation saved to {doc_path}")
|
||||
|
||||
if pdf:
|
||||
console.print("[yellow]Generating PDF...[/yellow]")
|
||||
pdf_path = pipeline.generate_pdf(doc_path)
|
||||
console.print(f" [green]✓[/green] PDF saved to {pdf_path}")
|
||||
|
||||
console.print(f"\n[bold green]Done![/bold green] Output: {output}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
218
src/cad_documenter/doc_generator.py
Normal file
218
src/cad_documenter/doc_generator.py
Normal file
@@ -0,0 +1,218 @@
|
||||
"""Documentation generator - produces markdown and PDF output."""
|
||||
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader, BaseLoader
|
||||
|
||||
from .vision_analyzer import ComponentAnalysis, Component
|
||||
|
||||
|
||||
# Default template embedded in code (can be overridden by files)
|
||||
DEFAULT_TEMPLATE = '''# {{ analysis.assembly_name }} - Technical Documentation
|
||||
|
||||
**Generated:** {{ timestamp }}
|
||||
**Source:** Video walkthrough documentation
|
||||
|
||||
---
|
||||
|
||||
## Executive Summary
|
||||
|
||||
{{ analysis.summary }}
|
||||
|
||||
---
|
||||
|
||||
## Components
|
||||
|
||||
{% for component in analysis.components %}
|
||||
### {{ loop.index }}. {{ component.name }}
|
||||
|
||||
{% if component.description %}
|
||||
{{ component.description }}
|
||||
{% endif %}
|
||||
|
||||
{% if component.function %}
|
||||
- **Function:** {{ component.function }}
|
||||
{% endif %}
|
||||
{% if component.material %}
|
||||
- **Material:** {{ component.material }}
|
||||
{% endif %}
|
||||
{% if component.part_number %}
|
||||
- **Part Number:** {{ component.part_number }}
|
||||
{% endif %}
|
||||
|
||||
{% if component.features %}
|
||||
**Key Features:**
|
||||
{% for feature in component.features %}
|
||||
- {{ feature }}
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
|
||||
{% if component.best_frame %}
|
||||

|
||||
{% endif %}
|
||||
|
||||
{% if component.transcript_excerpt %}
|
||||
> *From walkthrough:* "{{ component.transcript_excerpt }}"
|
||||
{% endif %}
|
||||
|
||||
---
|
||||
|
||||
{% endfor %}
|
||||
|
||||
{% if bom %}
|
||||
## Bill of Materials
|
||||
|
||||
| Item | P/N | Name | Qty | Material | Notes |
|
||||
|------|-----|------|-----|----------|-------|
|
||||
{% for component in analysis.components %}
|
||||
| {{ loop.index }} | {{ component.part_number or 'TBD' }} | {{ component.name }} | 1 | {{ component.material or 'TBD' }} | {{ component.function }} |
|
||||
{% endfor %}
|
||||
|
||||
{% endif %}
|
||||
|
||||
{% if analysis.assembly_notes %}
|
||||
## Assembly Notes
|
||||
|
||||
{{ analysis.assembly_notes }}
|
||||
|
||||
{% endif %}
|
||||
|
||||
{% if atomizer_hints %}
|
||||
## Atomizer FEA Hints
|
||||
|
||||
Based on the video walkthrough, the following optimization parameters are suggested:
|
||||
|
||||
```json
|
||||
{
|
||||
"model_understanding": {
|
||||
"components": {{ component_names | tojson }},
|
||||
"materials_mentioned": {{ materials | tojson }}
|
||||
},
|
||||
"suggested_study": {
|
||||
"objectives": [
|
||||
{"name": "mass", "direction": "minimize"}
|
||||
],
|
||||
"constraints_likely": []
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
{% endif %}
|
||||
|
||||
---
|
||||
|
||||
## Raw Transcript
|
||||
|
||||
<details>
|
||||
<summary>Click to expand full transcript</summary>
|
||||
|
||||
{{ analysis.raw_transcript }}
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
*Documentation generated by CAD-Documenter*
|
||||
'''
|
||||
|
||||
|
||||
class DocGenerator:
|
||||
"""Generates documentation from analysis results."""
|
||||
|
||||
def __init__(self, output_dir: Path, template_dir: Path | None = None):
|
||||
self.output_dir = output_dir
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Setup Jinja environment
|
||||
if template_dir and template_dir.exists():
|
||||
self.env = Environment(loader=FileSystemLoader(template_dir))
|
||||
else:
|
||||
self.env = Environment(loader=BaseLoader())
|
||||
|
||||
def generate(
|
||||
self,
|
||||
analysis: ComponentAnalysis,
|
||||
atomizer_hints: bool = False,
|
||||
bom: bool = False,
|
||||
template_name: str | None = None,
|
||||
) -> Path:
|
||||
"""Generate markdown documentation."""
|
||||
# Load template
|
||||
if template_name:
|
||||
template = self.env.get_template(f"{template_name}.md.j2")
|
||||
else:
|
||||
template = self.env.from_string(DEFAULT_TEMPLATE)
|
||||
|
||||
# Prepare template context
|
||||
context = {
|
||||
"analysis": analysis,
|
||||
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
|
||||
"atomizer_hints": atomizer_hints,
|
||||
"bom": bom,
|
||||
"component_names": [c.name for c in analysis.components],
|
||||
"materials": list(set(c.material for c in analysis.components if c.material)),
|
||||
}
|
||||
|
||||
# Render
|
||||
content = template.render(**context)
|
||||
|
||||
# Write output
|
||||
output_path = self.output_dir / "documentation.md"
|
||||
output_path.write_text(content)
|
||||
|
||||
return output_path
|
||||
|
||||
def generate_pdf(self, markdown_path: Path) -> Path:
|
||||
"""
|
||||
Generate PDF from markdown using Atomaste Report Standard.
|
||||
|
||||
Requires the atomaste-reports skill/Typst to be available.
|
||||
"""
|
||||
import subprocess
|
||||
|
||||
pdf_path = markdown_path.with_suffix(".pdf")
|
||||
|
||||
# Try to use Atomaste Report Standard if available
|
||||
# Otherwise fall back to pandoc
|
||||
try:
|
||||
# Check if atomaste build script exists
|
||||
build_script = Path("/home/papa/Atomaste/Templates/Atomaste_Report_Standard/scripts/build-report.py")
|
||||
if build_script.exists():
|
||||
cmd = ["python3", str(build_script), str(markdown_path), "-o", str(pdf_path)]
|
||||
else:
|
||||
# Fallback to pandoc
|
||||
cmd = ["pandoc", str(markdown_path), "-o", str(pdf_path)]
|
||||
|
||||
subprocess.run(cmd, capture_output=True, check=True)
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise RuntimeError(f"PDF generation failed: {e}")
|
||||
|
||||
return pdf_path
|
||||
|
||||
def generate_atomizer_hints(self, analysis: ComponentAnalysis) -> Path:
|
||||
"""Generate standalone Atomizer hints JSON file."""
|
||||
import json
|
||||
|
||||
hints = {
|
||||
"model_understanding": {
|
||||
"assembly_name": analysis.assembly_name,
|
||||
"components": [c.name for c in analysis.components],
|
||||
"materials_mentioned": list(set(c.material for c in analysis.components if c.material)),
|
||||
"functions": {c.name: c.function for c in analysis.components if c.function},
|
||||
},
|
||||
"suggested_spec": {
|
||||
"objectives": [
|
||||
{"name": "mass", "direction": "minimize"}
|
||||
],
|
||||
"parameters_likely": [],
|
||||
"constraints_likely": [],
|
||||
},
|
||||
"transcript_highlights": [],
|
||||
}
|
||||
|
||||
output_path = self.output_dir / "atomizer_hints.json"
|
||||
output_path.write_text(json.dumps(hints, indent=2))
|
||||
|
||||
return output_path
|
||||
64
src/cad_documenter/pipeline.py
Normal file
64
src/cad_documenter/pipeline.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""Main documentation pipeline orchestrator."""
|
||||
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from .video_processor import VideoProcessor, FrameInfo
|
||||
from .audio_analyzer import AudioAnalyzer, Transcript
|
||||
from .vision_analyzer import VisionAnalyzer, ComponentAnalysis
|
||||
from .doc_generator import DocGenerator
|
||||
|
||||
|
||||
@dataclass
|
||||
class PipelineConfig:
|
||||
"""Pipeline configuration."""
|
||||
frame_interval: float = 2.0
|
||||
whisper_model: str = "base"
|
||||
vision_model: str = "gpt-4o" # or local model
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentationPipeline:
|
||||
"""Orchestrates the full documentation pipeline."""
|
||||
|
||||
video_path: Path
|
||||
output_dir: Path
|
||||
frame_interval: float = 2.0
|
||||
whisper_model: str = "base"
|
||||
|
||||
def __post_init__(self):
|
||||
self.video_processor = VideoProcessor(self.video_path, self.output_dir / "frames")
|
||||
self.audio_analyzer = AudioAnalyzer(self.video_path, self.whisper_model)
|
||||
self.vision_analyzer = VisionAnalyzer()
|
||||
self.doc_generator = DocGenerator(self.output_dir)
|
||||
|
||||
def extract_frames(self) -> list[FrameInfo]:
|
||||
"""Extract key frames from video."""
|
||||
return self.video_processor.extract_frames(interval=self.frame_interval)
|
||||
|
||||
def transcribe_audio(self) -> Transcript:
|
||||
"""Transcribe audio track."""
|
||||
return self.audio_analyzer.transcribe()
|
||||
|
||||
def analyze_components(
|
||||
self, frames: list[FrameInfo], transcript: Transcript
|
||||
) -> ComponentAnalysis:
|
||||
"""Analyze frames + transcript to identify components."""
|
||||
return self.vision_analyzer.analyze(frames, transcript)
|
||||
|
||||
def generate_documentation(
|
||||
self,
|
||||
analysis: ComponentAnalysis,
|
||||
atomizer_hints: bool = False,
|
||||
bom: bool = False,
|
||||
) -> Path:
|
||||
"""Generate markdown documentation."""
|
||||
return self.doc_generator.generate(
|
||||
analysis,
|
||||
atomizer_hints=atomizer_hints,
|
||||
bom=bom,
|
||||
)
|
||||
|
||||
def generate_pdf(self, markdown_path: Path) -> Path:
|
||||
"""Generate PDF from markdown using Atomaste Report Standard."""
|
||||
return self.doc_generator.generate_pdf(markdown_path)
|
||||
112
src/cad_documenter/video_processor.py
Normal file
112
src/cad_documenter/video_processor.py
Normal file
@@ -0,0 +1,112 @@
|
||||
"""Video processing module - frame extraction and scene detection."""
|
||||
|
||||
import subprocess
|
||||
import json
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class FrameInfo:
|
||||
"""Information about an extracted frame."""
|
||||
path: Path
|
||||
timestamp: float # seconds
|
||||
frame_number: int
|
||||
|
||||
|
||||
class VideoProcessor:
|
||||
"""Handles video frame extraction using ffmpeg."""
|
||||
|
||||
def __init__(self, video_path: Path, output_dir: Path):
|
||||
self.video_path = video_path
|
||||
self.output_dir = output_dir
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def get_duration(self) -> float:
|
||||
"""Get video duration in seconds."""
|
||||
cmd = [
|
||||
"ffprobe", "-v", "quiet",
|
||||
"-print_format", "json",
|
||||
"-show_format",
|
||||
str(self.video_path)
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
data = json.loads(result.stdout)
|
||||
return float(data["format"]["duration"])
|
||||
|
||||
def extract_frames(self, interval: float = 2.0) -> list[FrameInfo]:
|
||||
"""
|
||||
Extract frames at regular intervals.
|
||||
|
||||
Args:
|
||||
interval: Seconds between frame extractions
|
||||
|
||||
Returns:
|
||||
List of FrameInfo objects for extracted frames
|
||||
"""
|
||||
duration = self.get_duration()
|
||||
frames = []
|
||||
|
||||
# Use ffmpeg to extract frames at interval
|
||||
output_pattern = self.output_dir / "frame_%04d.jpg"
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", str(self.video_path),
|
||||
"-vf", f"fps=1/{interval}",
|
||||
"-q:v", "2", # High quality JPEG
|
||||
str(output_pattern)
|
||||
]
|
||||
subprocess.run(cmd, capture_output=True)
|
||||
|
||||
# Collect extracted frames
|
||||
for i, frame_path in enumerate(sorted(self.output_dir.glob("frame_*.jpg"))):
|
||||
timestamp = i * interval
|
||||
frames.append(FrameInfo(
|
||||
path=frame_path,
|
||||
timestamp=timestamp,
|
||||
frame_number=i
|
||||
))
|
||||
|
||||
return frames
|
||||
|
||||
def extract_audio(self, output_path: Path | None = None) -> Path:
|
||||
"""Extract audio track from video."""
|
||||
if output_path is None:
|
||||
output_path = self.output_dir.parent / "audio.wav"
|
||||
|
||||
cmd = [
|
||||
"ffmpeg", "-y",
|
||||
"-i", str(self.video_path),
|
||||
"-vn", # No video
|
||||
"-acodec", "pcm_s16le",
|
||||
"-ar", "16000", # 16kHz for Whisper
|
||||
"-ac", "1", # Mono
|
||||
str(output_path)
|
||||
]
|
||||
subprocess.run(cmd, capture_output=True)
|
||||
return output_path
|
||||
|
||||
def detect_scene_changes(self, threshold: float = 0.3) -> list[float]:
|
||||
"""
|
||||
Detect scene changes in video.
|
||||
|
||||
Returns list of timestamps where significant visual changes occur.
|
||||
"""
|
||||
cmd = [
|
||||
"ffmpeg", "-i", str(self.video_path),
|
||||
"-vf", f"select='gt(scene,{threshold})',showinfo",
|
||||
"-f", "null", "-"
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
# Parse scene change timestamps from ffmpeg output
|
||||
timestamps = []
|
||||
for line in result.stderr.split("\n"):
|
||||
if "pts_time:" in line:
|
||||
# Extract timestamp
|
||||
parts = line.split("pts_time:")
|
||||
if len(parts) > 1:
|
||||
ts = float(parts[1].split()[0])
|
||||
timestamps.append(ts)
|
||||
|
||||
return timestamps
|
||||
111
src/cad_documenter/vision_analyzer.py
Normal file
111
src/cad_documenter/vision_analyzer.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""Vision analysis module - component detection and feature extraction."""
|
||||
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from .video_processor import FrameInfo
|
||||
from .audio_analyzer import Transcript
|
||||
|
||||
|
||||
@dataclass
|
||||
class Component:
|
||||
"""A detected component from the CAD model."""
|
||||
name: str
|
||||
description: str
|
||||
function: str = ""
|
||||
material: str = ""
|
||||
features: list[str] = field(default_factory=list)
|
||||
best_frame: FrameInfo | None = None
|
||||
transcript_excerpt: str = ""
|
||||
part_number: str = "" # For Part Manager integration
|
||||
|
||||
|
||||
@dataclass
|
||||
class ComponentAnalysis:
|
||||
"""Complete analysis results."""
|
||||
assembly_name: str
|
||||
summary: str
|
||||
components: list[Component]
|
||||
assembly_notes: str = ""
|
||||
raw_transcript: str = ""
|
||||
|
||||
|
||||
class VisionAnalyzer:
|
||||
"""Analyzes frames to identify components and features."""
|
||||
|
||||
def __init__(self, model: str = "gpt-4o"):
|
||||
self.model = model
|
||||
|
||||
def analyze(
|
||||
self, frames: list[FrameInfo], transcript: Transcript
|
||||
) -> ComponentAnalysis:
|
||||
"""
|
||||
Analyze frames and transcript to identify components.
|
||||
|
||||
This is where the AI magic happens - correlating visual and verbal info.
|
||||
"""
|
||||
# For MVP, we'll use a multi-modal approach:
|
||||
# 1. Send key frames to vision model with transcript context
|
||||
# 2. Ask it to identify components and correlate with verbal descriptions
|
||||
|
||||
# Placeholder implementation - will be enhanced with actual AI calls
|
||||
components = self._identify_components(frames, transcript)
|
||||
summary = self._generate_summary(components, transcript)
|
||||
|
||||
return ComponentAnalysis(
|
||||
assembly_name=self._extract_assembly_name(transcript),
|
||||
summary=summary,
|
||||
components=components,
|
||||
assembly_notes=self._extract_assembly_notes(transcript),
|
||||
raw_transcript=transcript.full_text,
|
||||
)
|
||||
|
||||
def _identify_components(
|
||||
self, frames: list[FrameInfo], transcript: Transcript
|
||||
) -> list[Component]:
|
||||
"""Identify individual components from frames + transcript."""
|
||||
# TODO: Implement vision API calls
|
||||
# For now, return empty list - will be implemented in Phase 1
|
||||
return []
|
||||
|
||||
def _generate_summary(
|
||||
self, components: list[Component], transcript: Transcript
|
||||
) -> str:
|
||||
"""Generate executive summary of the assembly."""
|
||||
# TODO: Implement with LLM
|
||||
return f"Assembly documentation generated from video walkthrough. {len(components)} components identified."
|
||||
|
||||
def _extract_assembly_name(self, transcript: Transcript) -> str:
|
||||
"""Try to extract assembly name from transcript."""
|
||||
# Look for common patterns
|
||||
text = transcript.full_text.lower()
|
||||
patterns = ["this is the", "presenting the", "looking at the", "reviewing the"]
|
||||
for pattern in patterns:
|
||||
if pattern in text:
|
||||
idx = text.find(pattern) + len(pattern)
|
||||
name = transcript.full_text[idx:idx + 50].strip().split(".")[0]
|
||||
return name.strip()
|
||||
return "Untitled Assembly"
|
||||
|
||||
def _extract_assembly_notes(self, transcript: Transcript) -> str:
|
||||
"""Extract assembly-related notes from transcript."""
|
||||
# Look for assembly instructions in transcript
|
||||
keywords = ["assemble", "install", "mount", "attach", "connect"]
|
||||
notes = []
|
||||
for seg in transcript.segments:
|
||||
if any(kw in seg.text.lower() for kw in keywords):
|
||||
notes.append(seg.text)
|
||||
return " ".join(notes) if notes else ""
|
||||
|
||||
def analyze_single_frame(self, frame: FrameInfo, context: str = "") -> dict:
|
||||
"""
|
||||
Analyze a single frame for components and features.
|
||||
|
||||
Returns dict with detected components, features, and confidence.
|
||||
"""
|
||||
# TODO: Implement with vision API
|
||||
return {
|
||||
"components": [],
|
||||
"features": [],
|
||||
"confidence": 0.0
|
||||
}
|
||||
Reference in New Issue
Block a user