Add --export-only mode for Clawdbot hybrid workflow

- New flag: cad-doc project process --export-only
- Extracts frames and transcribes locally (no API calls)
- Exports to clawdbot_export/ folder for Clawdbot processing
- New 'exported' status for videos awaiting Clawdbot
- Updated CLI status display to show exported count
This commit is contained in:
Mario Lavoie
2026-01-28 01:59:17 +00:00
parent fce07c75e9
commit 6890ca0283
3 changed files with 162 additions and 18 deletions

View File

@@ -77,11 +77,13 @@ def add(project_path: Path, video: Path, no_copy: bool):
@project.command()
@click.argument("project_path", type=click.Path(exists=True, path_type=Path))
@click.option("--all", "process_all", is_flag=True, help="Reprocess all videos, not just pending")
def process(project_path: Path, process_all: bool):
@click.option("--export-only", is_flag=True, help="Only extract frames and transcribe (skip vision API). Outputs for Clawdbot processing.")
def process(project_path: Path, process_all: bool, export_only: bool):
"""Process pending videos in the project.
Example:
cad-doc project process ./my-project
cad-doc project process ./my-project --export-only # For Clawdbot hybrid workflow
"""
try:
proj = Project.load(project_path)
@@ -92,7 +94,8 @@ def process(project_path: Path, process_all: bool):
console.print("[yellow]No pending videos to process[/yellow]")
return
console.print(f"Processing {len(pending)} video(s)...")
mode_text = "[cyan](export-only mode)[/cyan] " if export_only else ""
console.print(f"{mode_text}Processing {len(pending)} video(s)...")
console.print()
processor = IncrementalProcessor(proj, config)
@@ -107,10 +110,19 @@ def process(project_path: Path, process_all: bool):
progress.update(task, description=msg)
task = progress.add_task("Starting...", total=None)
results = processor.process_pending(progress_callback=on_progress)
results = processor.process_pending(progress_callback=on_progress, export_only=export_only)
# Show results
console.print()
if export_only:
console.print(Panel.fit(
f"[bold green]Export complete![/bold green]\n\n"
f"Videos extracted: {results['processed']}\n"
f"Export folder: [cyan]{proj.project_dir / 'clawdbot_export'}[/cyan]\n\n"
f"[dim]Tell Clawdbot: \"Process CAD report for {proj.manifest.name}\"[/dim]",
title="Ready for Clawdbot"
))
else:
console.print(Panel.fit(
f"[bold green]Processing complete![/bold green]\n\n"
f"Videos processed: {results['processed']}\n"
@@ -135,6 +147,7 @@ def process(project_path: Path, process_all: bool):
for err in results['errors']:
console.print(f" [red]✗[/red] {err['video']}: {err['error']}")
if not export_only:
console.print(f"\nRun [cyan]cad-doc project generate {project_path}[/cyan] to create documentation")
except FileNotFoundError as e:
@@ -211,6 +224,7 @@ def status(project_path: Path):
status_style = {
"pending": "yellow",
"processed": "green",
"exported": "cyan",
"error": "red",
}.get(video.status, "white")
@@ -225,7 +239,11 @@ def status(project_path: Path):
# Summary
console.print()
console.print(f"Total videos: {status['total_videos']} ({status['pending']} pending)")
exported = status.get('exported', 0)
status_parts = [f"{status['pending']} pending"]
if exported > 0:
status_parts.append(f"{exported} exported")
console.print(f"Total videos: {status['total_videos']} ({', '.join(status_parts)})")
console.print(f"Components: {status['total_components']}")
console.print(f"Total duration: {status['total_duration']:.1f}s")
console.print(f"Total frames: {status['total_frames']}")
@@ -234,6 +252,11 @@ def status(project_path: Path):
console.print()
console.print(f"[yellow]Run [cyan]cad-doc project process {project_path}[/cyan] to process pending videos[/yellow]")
if exported > 0:
console.print()
console.print(f"[cyan]{exported} video(s) ready for Clawdbot processing[/cyan]")
console.print(f"[dim]Export folder: {project_path / 'clawdbot_export'}[/dim]")
except FileNotFoundError as e:
console.print(f"[red]Error:[/red] {e}")
raise SystemExit(1)

View File

@@ -63,8 +63,14 @@ class IncrementalProcessor:
self.apm_client = get_apm_client()
self.component_matcher = ComponentMatcher(self.apm_client) if self.apm_client.is_available() else None
def process_pending(self, progress_callback=None) -> dict:
"""Process all pending videos."""
def process_pending(self, progress_callback=None, export_only: bool = False) -> dict:
"""Process all pending videos.
Args:
progress_callback: Function to call with progress updates
export_only: If True, only extract frames and transcribe (skip vision API).
Outputs to clawdbot_export/ for hybrid workflow.
"""
pending = self.project.get_pending_videos()
if not pending:
@@ -80,9 +86,13 @@ class IncrementalProcessor:
for i, video_entry in enumerate(pending):
if progress_callback:
progress_callback(f"Processing {video_entry.filename} ({i+1}/{len(pending)})")
mode = "Exporting" if export_only else "Processing"
progress_callback(f"{mode} {video_entry.filename} ({i+1}/{len(pending)})")
try:
if export_only:
video_results = self.export_video_for_clawdbot(video_entry)
else:
video_results = self.process_video(video_entry)
results["processed"] += 1
results["new_components"].extend(video_results.get("new_components", []))
@@ -161,6 +171,116 @@ class IncrementalProcessor:
**merge_results
}
def export_video_for_clawdbot(self, video_entry: VideoEntry) -> dict:
"""Export frames and transcript for Clawdbot processing (no API calls).
Creates a structured export folder that Clawdbot can process with vision
and context awareness.
"""
import json
import shutil
video_path = self.project.get_video_path(video_entry)
# Create export directory
export_base = self.project.project_dir / "clawdbot_export"
export_dir = export_base / video_path.stem
export_frames_dir = export_dir / "frames"
export_frames_dir.mkdir(parents=True, exist_ok=True)
# Create output dir for this video's frames (in normal location too)
video_frames_dir = self.project.frames_dir / video_path.stem
video_frames_dir.mkdir(exist_ok=True)
# Run pipeline (extraction and transcription only)
pipeline = DocumentationPipeline(
video_path=video_path,
output_dir=video_frames_dir,
config=self.config,
)
# Extract and transcribe (no API calls here)
frames = pipeline.extract_frames()
transcript = pipeline.transcribe_audio()
# Save transcript to project location
transcript_file = self.project.transcripts_dir / f"{video_path.stem}.json"
self._save_transcript(transcript, transcript_file)
# Copy frames to export folder with timestamp names
frame_manifest = []
for frame in frames:
# Format timestamp as HH-MM-SS
ts = frame.timestamp
hours = int(ts // 3600)
minutes = int((ts % 3600) // 60)
seconds = int(ts % 60)
ts_name = f"{hours:02d}-{minutes:02d}-{seconds:02d}.png"
dest = export_frames_dir / ts_name
shutil.copy2(frame.path, dest)
frame_manifest.append({
"file": ts_name,
"timestamp": ts,
"timestamp_formatted": f"{hours:02d}:{minutes:02d}:{seconds:02d}",
"frame_number": frame.frame_number,
"scene_score": getattr(frame, 'scene_score', None),
})
# Save transcript to export folder
export_transcript = {
"language": getattr(transcript, 'language', 'unknown'),
"duration": getattr(transcript, 'duration', 0),
"text": transcript.full_text if hasattr(transcript, 'full_text') else str(transcript),
"segments": [
{
"start": seg.start,
"end": seg.end,
"text": seg.text.strip(),
}
for seg in transcript.segments
] if hasattr(transcript, 'segments') else []
}
with open(export_dir / "transcript.json", "w", encoding="utf-8") as f:
json.dump(export_transcript, f, indent=2, ensure_ascii=False)
# Save metadata
metadata = {
"source_video": video_entry.filename,
"project_name": self.project.manifest.name,
"project_description": self.project.manifest.description,
"extracted_at": datetime.now().isoformat(),
"duration": getattr(transcript, 'duration', 0),
"frame_count": len(frames),
"frames": frame_manifest,
"status": "ready_for_clawdbot",
}
with open(export_dir / "metadata.json", "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=2)
# Update video entry (mark as exported, not fully processed)
video_entry.status = "exported"
video_entry.processed_at = datetime.now().isoformat()
video_entry.duration = getattr(transcript, 'duration', 0)
video_entry.transcript_file = transcript_file.name
video_entry.frames_extracted = len(frames)
# Update project totals
self.project.manifest.total_frames += len(frames)
self.project.manifest.total_duration += video_entry.duration or 0
return {
"frames": len(frames),
"transcript_duration": video_entry.duration,
"export_dir": str(export_dir),
"new_components": [],
"updated_components": [],
"changes": [],
}
def detect_changes(self, transcript: Transcript) -> list[ChangeDetection]:
"""Detect explicit changes mentioned in transcript."""
changes = []

View File

@@ -18,7 +18,7 @@ class VideoEntry:
transcript_file: str | None = None
frames_extracted: int = 0
components_found: list[str] = field(default_factory=list)
status: Literal["pending", "processed", "error"] = "pending"
status: Literal["pending", "processed", "exported", "error"] = "pending"
error_message: str | None = None
@@ -403,6 +403,7 @@ Edit files in `context/` to give the AI better understanding:
"total_videos": len(videos),
"pending": len([v for v in videos if v.status == "pending"]),
"processed": len([v for v in videos if v.status == "processed"]),
"exported": len([v for v in videos if v.status == "exported"]),
"errors": len([v for v in videos if v.status == "error"]),
"total_components": len(self.manifest.components),
"total_duration": self.manifest.total_duration,