Add --export-only mode for Clawdbot hybrid workflow

- New flag: cad-doc project process --export-only - Extracts frames and transcribes locally (no API calls) - Exports to clawdbot_export/ folder for Clawdbot processing - New 'exported' status for videos awaiting Clawdbot - Updated CLI status display to show exported count
2026-01-28 01:59:17 +00:00
parent fce07c75e9
commit 6890ca0283
3 changed files with 162 additions and 18 deletions
--- a/src/cad_documenter/cli_project.py
+++ b/src/cad_documenter/cli_project.py
@@ -77,11 +77,13 @@ def add(project_path: Path, video: Path, no_copy: bool):
@project.command()
@click.argument("project_path", type=click.Path(exists=True, path_type=Path))
@click.option("--all", "process_all", is_flag=True, help="Reprocess all videos, not just pending")
-def process(project_path: Path, process_all: bool):
+@click.option("--export-only", is_flag=True, help="Only extract frames and transcribe (skip vision API). Outputs for Clawdbot processing.")
+def process(project_path: Path, process_all: bool, export_only: bool):
    """Process pending videos in the project.
    
    Example:
        cad-doc project process ./my-project
+        cad-doc project process ./my-project --export-only  # For Clawdbot hybrid workflow
    """
    try:
        proj = Project.load(project_path)
@@ -92,7 +94,8 @@ def process(project_path: Path, process_all: bool):
            console.print("[yellow]No pending videos to process[/yellow]")
            return
        
-        console.print(f"Processing {len(pending)} video(s)...")
+        mode_text = "[cyan](export-only mode)[/cyan] " if export_only else ""
+        console.print(f"{mode_text}Processing {len(pending)} video(s)...")
        console.print()
        
        processor = IncrementalProcessor(proj, config)
@@ -107,10 +110,19 @@ def process(project_path: Path, process_all: bool):
                progress.update(task, description=msg)
            
            task = progress.add_task("Starting...", total=None)
-            results = processor.process_pending(progress_callback=on_progress)
+            results = processor.process_pending(progress_callback=on_progress, export_only=export_only)
        
        # Show results
        console.print()
+        if export_only:
+            console.print(Panel.fit(
+                f"[bold green]Export complete![/bold green]\n\n"
+                f"Videos extracted: {results['processed']}\n"
+                f"Export folder: [cyan]{proj.project_dir / 'clawdbot_export'}[/cyan]\n\n"
+                f"[dim]Tell Clawdbot: \"Process CAD report for {proj.manifest.name}\"[/dim]",
+                title="Ready for Clawdbot"
+            ))
+        else:
            console.print(Panel.fit(
                f"[bold green]Processing complete![/bold green]\n\n"
                f"Videos processed: {results['processed']}\n"
@@ -135,6 +147,7 @@ def process(project_path: Path, process_all: bool):
            for err in results['errors']:
                console.print(f"  [red]✗[/red] {err['video']}: {err['error']}")
        
+        if not export_only:
            console.print(f"\nRun [cyan]cad-doc project generate {project_path}[/cyan] to create documentation")
        
    except FileNotFoundError as e:
@@ -211,6 +224,7 @@ def status(project_path: Path):
            status_style = {
                "pending": "yellow",
                "processed": "green",
+                "exported": "cyan",
                "error": "red",
            }.get(video.status, "white")
            
@@ -225,7 +239,11 @@ def status(project_path: Path):
        
        # Summary
        console.print()
-        console.print(f"Total videos: {status['total_videos']} ({status['pending']} pending)")
+        exported = status.get('exported', 0)
+        status_parts = [f"{status['pending']} pending"]
+        if exported > 0:
+            status_parts.append(f"{exported} exported")
+        console.print(f"Total videos: {status['total_videos']} ({', '.join(status_parts)})")
        console.print(f"Components: {status['total_components']}")
        console.print(f"Total duration: {status['total_duration']:.1f}s")
        console.print(f"Total frames: {status['total_frames']}")
@@ -234,6 +252,11 @@ def status(project_path: Path):
            console.print()
            console.print(f"[yellow]Run [cyan]cad-doc project process {project_path}[/cyan] to process pending videos[/yellow]")
        
+        if exported > 0:
+            console.print()
+            console.print(f"[cyan]{exported} video(s) ready for Clawdbot processing[/cyan]")
+            console.print(f"[dim]Export folder: {project_path / 'clawdbot_export'}[/dim]")
+        
    except FileNotFoundError as e:
        console.print(f"[red]Error:[/red] {e}")
        raise SystemExit(1)
--- a/src/cad_documenter/incremental.py
+++ b/src/cad_documenter/incremental.py
@@ -63,8 +63,14 @@ class IncrementalProcessor:
        self.apm_client = get_apm_client()
        self.component_matcher = ComponentMatcher(self.apm_client) if self.apm_client.is_available() else None
    
-    def process_pending(self, progress_callback=None) -> dict:
-        """Process all pending videos."""
+    def process_pending(self, progress_callback=None, export_only: bool = False) -> dict:
+        """Process all pending videos.
+        
+        Args:
+            progress_callback: Function to call with progress updates
+            export_only: If True, only extract frames and transcribe (skip vision API).
+                        Outputs to clawdbot_export/ for hybrid workflow.
+        """
        pending = self.project.get_pending_videos()
        
        if not pending:
@@ -80,9 +86,13 @@ class IncrementalProcessor:
        
        for i, video_entry in enumerate(pending):
            if progress_callback:
-                progress_callback(f"Processing {video_entry.filename} ({i+1}/{len(pending)})")
+                mode = "Exporting" if export_only else "Processing"
+                progress_callback(f"{mode} {video_entry.filename} ({i+1}/{len(pending)})")
            
            try:
+                if export_only:
+                    video_results = self.export_video_for_clawdbot(video_entry)
+                else:
                    video_results = self.process_video(video_entry)
                results["processed"] += 1
                results["new_components"].extend(video_results.get("new_components", []))
@@ -161,6 +171,116 @@ class IncrementalProcessor:
            **merge_results
        }
    
+    def export_video_for_clawdbot(self, video_entry: VideoEntry) -> dict:
+        """Export frames and transcript for Clawdbot processing (no API calls).
+        
+        Creates a structured export folder that Clawdbot can process with vision
+        and context awareness.
+        """
+        import json
+        import shutil
+        
+        video_path = self.project.get_video_path(video_entry)
+        
+        # Create export directory
+        export_base = self.project.project_dir / "clawdbot_export"
+        export_dir = export_base / video_path.stem
+        export_frames_dir = export_dir / "frames"
+        export_frames_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Create output dir for this video's frames (in normal location too)
+        video_frames_dir = self.project.frames_dir / video_path.stem
+        video_frames_dir.mkdir(exist_ok=True)
+        
+        # Run pipeline (extraction and transcription only)
+        pipeline = DocumentationPipeline(
+            video_path=video_path,
+            output_dir=video_frames_dir,
+            config=self.config,
+        )
+        
+        # Extract and transcribe (no API calls here)
+        frames = pipeline.extract_frames()
+        transcript = pipeline.transcribe_audio()
+        
+        # Save transcript to project location
+        transcript_file = self.project.transcripts_dir / f"{video_path.stem}.json"
+        self._save_transcript(transcript, transcript_file)
+        
+        # Copy frames to export folder with timestamp names
+        frame_manifest = []
+        for frame in frames:
+            # Format timestamp as HH-MM-SS
+            ts = frame.timestamp
+            hours = int(ts // 3600)
+            minutes = int((ts % 3600) // 60)
+            seconds = int(ts % 60)
+            ts_name = f"{hours:02d}-{minutes:02d}-{seconds:02d}.png"
+            
+            dest = export_frames_dir / ts_name
+            shutil.copy2(frame.path, dest)
+            
+            frame_manifest.append({
+                "file": ts_name,
+                "timestamp": ts,
+                "timestamp_formatted": f"{hours:02d}:{minutes:02d}:{seconds:02d}",
+                "frame_number": frame.frame_number,
+                "scene_score": getattr(frame, 'scene_score', None),
+            })
+        
+        # Save transcript to export folder
+        export_transcript = {
+            "language": getattr(transcript, 'language', 'unknown'),
+            "duration": getattr(transcript, 'duration', 0),
+            "text": transcript.full_text if hasattr(transcript, 'full_text') else str(transcript),
+            "segments": [
+                {
+                    "start": seg.start,
+                    "end": seg.end,
+                    "text": seg.text.strip(),
+                }
+                for seg in transcript.segments
+            ] if hasattr(transcript, 'segments') else []
+        }
+        
+        with open(export_dir / "transcript.json", "w", encoding="utf-8") as f:
+            json.dump(export_transcript, f, indent=2, ensure_ascii=False)
+        
+        # Save metadata
+        metadata = {
+            "source_video": video_entry.filename,
+            "project_name": self.project.manifest.name,
+            "project_description": self.project.manifest.description,
+            "extracted_at": datetime.now().isoformat(),
+            "duration": getattr(transcript, 'duration', 0),
+            "frame_count": len(frames),
+            "frames": frame_manifest,
+            "status": "ready_for_clawdbot",
+        }
+        
+        with open(export_dir / "metadata.json", "w", encoding="utf-8") as f:
+            json.dump(metadata, f, indent=2)
+        
+        # Update video entry (mark as exported, not fully processed)
+        video_entry.status = "exported"
+        video_entry.processed_at = datetime.now().isoformat()
+        video_entry.duration = getattr(transcript, 'duration', 0)
+        video_entry.transcript_file = transcript_file.name
+        video_entry.frames_extracted = len(frames)
+        
+        # Update project totals
+        self.project.manifest.total_frames += len(frames)
+        self.project.manifest.total_duration += video_entry.duration or 0
+        
+        return {
+            "frames": len(frames),
+            "transcript_duration": video_entry.duration,
+            "export_dir": str(export_dir),
+            "new_components": [],
+            "updated_components": [],
+            "changes": [],
+        }
+    
    def detect_changes(self, transcript: Transcript) -> list[ChangeDetection]:
        """Detect explicit changes mentioned in transcript."""
        changes = []
--- a/src/cad_documenter/project.py
+++ b/src/cad_documenter/project.py
@@ -18,7 +18,7 @@ class VideoEntry:
    transcript_file: str | None = None
    frames_extracted: int = 0
    components_found: list[str] = field(default_factory=list)
-    status: Literal["pending", "processed", "error"] = "pending"
+    status: Literal["pending", "processed", "exported", "error"] = "pending"
    error_message: str | None = None


@@ -403,6 +403,7 @@ Edit files in `context/` to give the AI better understanding:
            "total_videos": len(videos),
            "pending": len([v for v in videos if v.status == "pending"]),
            "processed": len([v for v in videos if v.status == "processed"]),
+            "exported": len([v for v in videos if v.status == "exported"]),
            "errors": len([v for v in videos if v.status == "error"]),
            "total_components": len(self.manifest.components),
            "total_duration": self.manifest.total_duration,