diff --git a/src/cad_documenter/cli_project.py b/src/cad_documenter/cli_project.py index 20122b5..7a7ba64 100644 --- a/src/cad_documenter/cli_project.py +++ b/src/cad_documenter/cli_project.py @@ -77,11 +77,13 @@ def add(project_path: Path, video: Path, no_copy: bool): @project.command() @click.argument("project_path", type=click.Path(exists=True, path_type=Path)) @click.option("--all", "process_all", is_flag=True, help="Reprocess all videos, not just pending") -def process(project_path: Path, process_all: bool): +@click.option("--export-only", is_flag=True, help="Only extract frames and transcribe (skip vision API). Outputs for Clawdbot processing.") +def process(project_path: Path, process_all: bool, export_only: bool): """Process pending videos in the project. Example: cad-doc project process ./my-project + cad-doc project process ./my-project --export-only # For Clawdbot hybrid workflow """ try: proj = Project.load(project_path) @@ -92,7 +94,8 @@ def process(project_path: Path, process_all: bool): console.print("[yellow]No pending videos to process[/yellow]") return - console.print(f"Processing {len(pending)} video(s)...") + mode_text = "[cyan](export-only mode)[/cyan] " if export_only else "" + console.print(f"{mode_text}Processing {len(pending)} video(s)...") console.print() processor = IncrementalProcessor(proj, config) @@ -107,18 +110,27 @@ def process(project_path: Path, process_all: bool): progress.update(task, description=msg) task = progress.add_task("Starting...", total=None) - results = processor.process_pending(progress_callback=on_progress) + results = processor.process_pending(progress_callback=on_progress, export_only=export_only) # Show results console.print() - console.print(Panel.fit( - f"[bold green]Processing complete![/bold green]\n\n" - f"Videos processed: {results['processed']}\n" - f"New components: {len(results['new_components'])}\n" - f"Updated components: {len(results['updated_components'])}\n" - f"Changes detected: {len(results['changes_detected'])}", - title="Results" - )) + if export_only: + console.print(Panel.fit( + f"[bold green]Export complete![/bold green]\n\n" + f"Videos extracted: {results['processed']}\n" + f"Export folder: [cyan]{proj.project_dir / 'clawdbot_export'}[/cyan]\n\n" + f"[dim]Tell Clawdbot: \"Process CAD report for {proj.manifest.name}\"[/dim]", + title="Ready for Clawdbot" + )) + else: + console.print(Panel.fit( + f"[bold green]Processing complete![/bold green]\n\n" + f"Videos processed: {results['processed']}\n" + f"New components: {len(results['new_components'])}\n" + f"Updated components: {len(results['updated_components'])}\n" + f"Changes detected: {len(results['changes_detected'])}", + title="Results" + )) if results['new_components']: console.print("\n[bold]New components:[/bold]") @@ -135,7 +147,8 @@ def process(project_path: Path, process_all: bool): for err in results['errors']: console.print(f" [red]✗[/red] {err['video']}: {err['error']}") - console.print(f"\nRun [cyan]cad-doc project generate {project_path}[/cyan] to create documentation") + if not export_only: + console.print(f"\nRun [cyan]cad-doc project generate {project_path}[/cyan] to create documentation") except FileNotFoundError as e: console.print(f"[red]Error:[/red] {e}") @@ -211,6 +224,7 @@ def status(project_path: Path): status_style = { "pending": "yellow", "processed": "green", + "exported": "cyan", "error": "red", }.get(video.status, "white") @@ -225,7 +239,11 @@ def status(project_path: Path): # Summary console.print() - console.print(f"Total videos: {status['total_videos']} ({status['pending']} pending)") + exported = status.get('exported', 0) + status_parts = [f"{status['pending']} pending"] + if exported > 0: + status_parts.append(f"{exported} exported") + console.print(f"Total videos: {status['total_videos']} ({', '.join(status_parts)})") console.print(f"Components: {status['total_components']}") console.print(f"Total duration: {status['total_duration']:.1f}s") console.print(f"Total frames: {status['total_frames']}") @@ -234,6 +252,11 @@ def status(project_path: Path): console.print() console.print(f"[yellow]Run [cyan]cad-doc project process {project_path}[/cyan] to process pending videos[/yellow]") + if exported > 0: + console.print() + console.print(f"[cyan]{exported} video(s) ready for Clawdbot processing[/cyan]") + console.print(f"[dim]Export folder: {project_path / 'clawdbot_export'}[/dim]") + except FileNotFoundError as e: console.print(f"[red]Error:[/red] {e}") raise SystemExit(1) diff --git a/src/cad_documenter/incremental.py b/src/cad_documenter/incremental.py index dc55cf6..ffbf7d4 100644 --- a/src/cad_documenter/incremental.py +++ b/src/cad_documenter/incremental.py @@ -63,8 +63,14 @@ class IncrementalProcessor: self.apm_client = get_apm_client() self.component_matcher = ComponentMatcher(self.apm_client) if self.apm_client.is_available() else None - def process_pending(self, progress_callback=None) -> dict: - """Process all pending videos.""" + def process_pending(self, progress_callback=None, export_only: bool = False) -> dict: + """Process all pending videos. + + Args: + progress_callback: Function to call with progress updates + export_only: If True, only extract frames and transcribe (skip vision API). + Outputs to clawdbot_export/ for hybrid workflow. + """ pending = self.project.get_pending_videos() if not pending: @@ -80,10 +86,14 @@ class IncrementalProcessor: for i, video_entry in enumerate(pending): if progress_callback: - progress_callback(f"Processing {video_entry.filename} ({i+1}/{len(pending)})") + mode = "Exporting" if export_only else "Processing" + progress_callback(f"{mode} {video_entry.filename} ({i+1}/{len(pending)})") try: - video_results = self.process_video(video_entry) + if export_only: + video_results = self.export_video_for_clawdbot(video_entry) + else: + video_results = self.process_video(video_entry) results["processed"] += 1 results["new_components"].extend(video_results.get("new_components", [])) results["updated_components"].extend(video_results.get("updated_components", [])) @@ -161,6 +171,116 @@ class IncrementalProcessor: **merge_results } + def export_video_for_clawdbot(self, video_entry: VideoEntry) -> dict: + """Export frames and transcript for Clawdbot processing (no API calls). + + Creates a structured export folder that Clawdbot can process with vision + and context awareness. + """ + import json + import shutil + + video_path = self.project.get_video_path(video_entry) + + # Create export directory + export_base = self.project.project_dir / "clawdbot_export" + export_dir = export_base / video_path.stem + export_frames_dir = export_dir / "frames" + export_frames_dir.mkdir(parents=True, exist_ok=True) + + # Create output dir for this video's frames (in normal location too) + video_frames_dir = self.project.frames_dir / video_path.stem + video_frames_dir.mkdir(exist_ok=True) + + # Run pipeline (extraction and transcription only) + pipeline = DocumentationPipeline( + video_path=video_path, + output_dir=video_frames_dir, + config=self.config, + ) + + # Extract and transcribe (no API calls here) + frames = pipeline.extract_frames() + transcript = pipeline.transcribe_audio() + + # Save transcript to project location + transcript_file = self.project.transcripts_dir / f"{video_path.stem}.json" + self._save_transcript(transcript, transcript_file) + + # Copy frames to export folder with timestamp names + frame_manifest = [] + for frame in frames: + # Format timestamp as HH-MM-SS + ts = frame.timestamp + hours = int(ts // 3600) + minutes = int((ts % 3600) // 60) + seconds = int(ts % 60) + ts_name = f"{hours:02d}-{minutes:02d}-{seconds:02d}.png" + + dest = export_frames_dir / ts_name + shutil.copy2(frame.path, dest) + + frame_manifest.append({ + "file": ts_name, + "timestamp": ts, + "timestamp_formatted": f"{hours:02d}:{minutes:02d}:{seconds:02d}", + "frame_number": frame.frame_number, + "scene_score": getattr(frame, 'scene_score', None), + }) + + # Save transcript to export folder + export_transcript = { + "language": getattr(transcript, 'language', 'unknown'), + "duration": getattr(transcript, 'duration', 0), + "text": transcript.full_text if hasattr(transcript, 'full_text') else str(transcript), + "segments": [ + { + "start": seg.start, + "end": seg.end, + "text": seg.text.strip(), + } + for seg in transcript.segments + ] if hasattr(transcript, 'segments') else [] + } + + with open(export_dir / "transcript.json", "w", encoding="utf-8") as f: + json.dump(export_transcript, f, indent=2, ensure_ascii=False) + + # Save metadata + metadata = { + "source_video": video_entry.filename, + "project_name": self.project.manifest.name, + "project_description": self.project.manifest.description, + "extracted_at": datetime.now().isoformat(), + "duration": getattr(transcript, 'duration', 0), + "frame_count": len(frames), + "frames": frame_manifest, + "status": "ready_for_clawdbot", + } + + with open(export_dir / "metadata.json", "w", encoding="utf-8") as f: + json.dump(metadata, f, indent=2) + + # Update video entry (mark as exported, not fully processed) + video_entry.status = "exported" + video_entry.processed_at = datetime.now().isoformat() + video_entry.duration = getattr(transcript, 'duration', 0) + video_entry.transcript_file = transcript_file.name + video_entry.frames_extracted = len(frames) + + # Update project totals + self.project.manifest.total_frames += len(frames) + self.project.manifest.total_duration += video_entry.duration or 0 + + return { + "frames": len(frames), + "transcript_duration": video_entry.duration, + "export_dir": str(export_dir), + "new_components": [], + "updated_components": [], + "changes": [], + } + def detect_changes(self, transcript: Transcript) -> list[ChangeDetection]: """Detect explicit changes mentioned in transcript.""" changes = [] diff --git a/src/cad_documenter/project.py b/src/cad_documenter/project.py index 85f907a..a2917b6 100644 --- a/src/cad_documenter/project.py +++ b/src/cad_documenter/project.py @@ -18,7 +18,7 @@ class VideoEntry: transcript_file: str | None = None frames_extracted: int = 0 components_found: list[str] = field(default_factory=list) - status: Literal["pending", "processed", "error"] = "pending" + status: Literal["pending", "processed", "exported", "error"] = "pending" error_message: str | None = None @@ -403,6 +403,7 @@ Edit files in `context/` to give the AI better understanding: "total_videos": len(videos), "pending": len([v for v in videos if v.status == "pending"]), "processed": len([v for v in videos if v.status == "processed"]), + "exported": len([v for v in videos if v.status == "exported"]), "errors": len([v for v in videos if v.status == "error"]), "total_components": len(self.manifest.components), "total_duration": self.manifest.total_duration,