diff --git a/src/cad_documenter/export.py b/src/cad_documenter/export.py index d6d8452..d12b6eb 100644 --- a/src/cad_documenter/export.py +++ b/src/cad_documenter/export.py @@ -64,18 +64,14 @@ class SessionExporter: self.on_progress("Merging clips...", 0.1) merged_path = self._merge_clips(session_dir, kept_clips, export_dir) - # Step 2: Transcribe + # Step 2: Transcribe (uses local GPU Whisper) self.on_progress("Transcribing audio...", 0.3) transcript = self._transcribe(merged_path, export_dir) - # Step 3: Extract frames at screenshot triggers - self.on_progress("Extracting frames...", 0.7) - frames_dir = export_dir / "frames" - self._extract_frames(merged_path, transcript, frames_dir) - - # Step 4: Create metadata - self.on_progress("Creating metadata...", 0.9) - self._create_metadata(session, export_dir, merged_path) + # Step 3: Create metadata + # Note: Frame extraction is done by Clawdbot using the knowledge-base skill + self.on_progress("Creating metadata...", 0.8) + self._create_metadata(session, export_dir, merged_path, transcript) self.on_progress("Export complete!", 1.0) @@ -230,13 +226,43 @@ class SessionExporter: secs = int(seconds % 60) return f"{mins:02d}-{secs:02d}" + def _find_screenshot_triggers(self, transcript: dict) -> list[dict]: + """Find 'screenshot' triggers in transcript with context.""" + triggers = [] + segments = transcript.get("segments", []) + + for i, segment in enumerate(segments): + text = segment.get("text", "").lower() + if "screenshot" in text: + timestamp = segment.get("start", 0) + + # Get context: 2 segments before and after + context_segments = segments[max(0, i-2):i+3] + context = " ".join(s.get("text", "") for s in context_segments) + + triggers.append({ + "timestamp": timestamp, + "timestamp_formatted": self._format_timestamp(timestamp), + "segment_text": segment.get("text", ""), + "context": context.strip(), + }) + + return triggers + def _create_metadata( self, session: Session, export_dir: Path, merged_path: Path, + transcript: dict, ) -> None: """Create metadata.json for Clawdbot.""" + # Find screenshot triggers for Mario + triggers = self._find_screenshot_triggers(transcript) + + # Get video duration + duration = self._get_video_duration(merged_path) + metadata = { "session_id": session.id, "name": session.name, @@ -245,7 +271,8 @@ class SessionExporter: "created_at": session.created_at.isoformat(), "exported_at": datetime.now().isoformat(), "clip_count": session.clip_count, - "total_duration": session.total_duration, + "total_duration": duration, + "status": "pending", # → "processed" after Clawdbot processes "clips": [ { "id": clip.id, @@ -254,10 +281,10 @@ class SessionExporter: } for clip in session.kept_clips ], + "screenshot_triggers": triggers, # Pre-parsed for Mario "files": { "video": "merged.mp4", "transcript": "transcript.json", - "frames": "frames/", }, }