Clarify Windows vs Clawdbot responsibilities
Windows KB Capture now: - Records clips - Merges video - Transcribes with Whisper (GPU) - Finds screenshot triggers with context - Exports: merged.mp4, transcript.json, metadata.json Clawdbot (via knowledge-base skill) now: - Extracts frames at trigger timestamps - Vision analyzes frames - Updates KB files - Organizes images Removed frame extraction from Windows - that's Mario's job. Added screenshot_triggers to metadata.json with context for Mario.
This commit is contained in:
@@ -64,18 +64,14 @@ class SessionExporter:
|
||||
self.on_progress("Merging clips...", 0.1)
|
||||
merged_path = self._merge_clips(session_dir, kept_clips, export_dir)
|
||||
|
||||
# Step 2: Transcribe
|
||||
# Step 2: Transcribe (uses local GPU Whisper)
|
||||
self.on_progress("Transcribing audio...", 0.3)
|
||||
transcript = self._transcribe(merged_path, export_dir)
|
||||
|
||||
# Step 3: Extract frames at screenshot triggers
|
||||
self.on_progress("Extracting frames...", 0.7)
|
||||
frames_dir = export_dir / "frames"
|
||||
self._extract_frames(merged_path, transcript, frames_dir)
|
||||
|
||||
# Step 4: Create metadata
|
||||
self.on_progress("Creating metadata...", 0.9)
|
||||
self._create_metadata(session, export_dir, merged_path)
|
||||
# Step 3: Create metadata
|
||||
# Note: Frame extraction is done by Clawdbot using the knowledge-base skill
|
||||
self.on_progress("Creating metadata...", 0.8)
|
||||
self._create_metadata(session, export_dir, merged_path, transcript)
|
||||
|
||||
self.on_progress("Export complete!", 1.0)
|
||||
|
||||
@@ -230,13 +226,43 @@ class SessionExporter:
|
||||
secs = int(seconds % 60)
|
||||
return f"{mins:02d}-{secs:02d}"
|
||||
|
||||
def _find_screenshot_triggers(self, transcript: dict) -> list[dict]:
|
||||
"""Find 'screenshot' triggers in transcript with context."""
|
||||
triggers = []
|
||||
segments = transcript.get("segments", [])
|
||||
|
||||
for i, segment in enumerate(segments):
|
||||
text = segment.get("text", "").lower()
|
||||
if "screenshot" in text:
|
||||
timestamp = segment.get("start", 0)
|
||||
|
||||
# Get context: 2 segments before and after
|
||||
context_segments = segments[max(0, i-2):i+3]
|
||||
context = " ".join(s.get("text", "") for s in context_segments)
|
||||
|
||||
triggers.append({
|
||||
"timestamp": timestamp,
|
||||
"timestamp_formatted": self._format_timestamp(timestamp),
|
||||
"segment_text": segment.get("text", ""),
|
||||
"context": context.strip(),
|
||||
})
|
||||
|
||||
return triggers
|
||||
|
||||
def _create_metadata(
|
||||
self,
|
||||
session: Session,
|
||||
export_dir: Path,
|
||||
merged_path: Path,
|
||||
transcript: dict,
|
||||
) -> None:
|
||||
"""Create metadata.json for Clawdbot."""
|
||||
# Find screenshot triggers for Mario
|
||||
triggers = self._find_screenshot_triggers(transcript)
|
||||
|
||||
# Get video duration
|
||||
duration = self._get_video_duration(merged_path)
|
||||
|
||||
metadata = {
|
||||
"session_id": session.id,
|
||||
"name": session.name,
|
||||
@@ -245,7 +271,8 @@ class SessionExporter:
|
||||
"created_at": session.created_at.isoformat(),
|
||||
"exported_at": datetime.now().isoformat(),
|
||||
"clip_count": session.clip_count,
|
||||
"total_duration": session.total_duration,
|
||||
"total_duration": duration,
|
||||
"status": "pending", # → "processed" after Clawdbot processes
|
||||
"clips": [
|
||||
{
|
||||
"id": clip.id,
|
||||
@@ -254,10 +281,10 @@ class SessionExporter:
|
||||
}
|
||||
for clip in session.kept_clips
|
||||
],
|
||||
"screenshot_triggers": triggers, # Pre-parsed for Mario
|
||||
"files": {
|
||||
"video": "merged.mp4",
|
||||
"transcript": "transcript.json",
|
||||
"frames": "frames/",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user