fix: Windows recording + pause button + Load Video feature

- recorder.py: Fixed Windows FFmpeg handling - Use CTRL_BREAK_EVENT instead of stdin 'q' to stop recording - Use NtSuspendProcess/NtResumeProcess for pause/resume - Add stderr reader thread to prevent blocking - Better error handling and status messages - Check if recording actually started before returning success - gui_capture.py: Added Load Video feature - New 'Load' button in header to import existing videos - Copies video to session folder and runs transcription - Supports mp4, mkv, avi, mov, webm formats Fixes: - Record button now properly enables Pause button - Recording stops cleanly on Windows - Can load pre-recorded videos (from OBS, etc.)
2026-02-10 18:14:01 +00:00
parent 9b24478f04
commit d998a9a2b1
2 changed files with 227 additions and 42 deletions
--- a/src/cad_documenter/gui_capture.py
+++ b/src/cad_documenter/gui_capture.py
@@ -190,6 +190,18 @@ class KBCaptureGUI:
            command=self._browse_folder,
        ).pack(side="right")
        
+        # Load video button
+        CTkButton(
+            header,
+            text="📂 Load",
+            width=60,
+            height=32,
+            font=("", 10),
+            fg_color=COLORS["bg_card"],
+            hover_color=COLORS["bg_elevated"],
+            command=self._load_video,
+        ).pack(side="right", padx=(0, 8))
+        
        # Timer card
        timer_frame = CTkFrame(main, fg_color=COLORS["bg_card"], corner_radius=10)
        timer_frame.pack(fill="x", pady=(0, 12))
@@ -498,6 +510,70 @@ class KBCaptureGUI:
        
        name_entry.bind("<Return>", lambda e: create())
    
+    def _load_video(self):
+        """Load an existing video file for processing."""
+        if not self.app:
+            messagebox.showwarning("No Folder", "Select a projects folder first")
+            return
+        
+        if self.app.state != AppState.IDLE:
+            messagebox.showwarning("Busy", "Stop current recording first")
+            return
+        
+        project = self.project_menu.get()
+        if project.startswith("("):
+            messagebox.showwarning("No Project", "Select a project first")
+            return
+        
+        # Ask for video file
+        video_path = filedialog.askopenfilename(
+            title="Select Video to Process",
+            filetypes=[
+                ("Video files", "*.mp4 *.mkv *.avi *.mov *.webm"),
+                ("All files", "*.*"),
+            ],
+        )
+        
+        if not video_path:
+            return
+        
+        video_path = Path(video_path)
+        if not video_path.exists():
+            messagebox.showerror("Error", "File not found")
+            return
+        
+        name = self.name_entry.get().strip() or video_path.stem
+        session_type = SessionType.DESIGN if self.type_var.get() == "design" else SessionType.ANALYSIS
+        
+        # Create session folder and copy/link video
+        import shutil
+        session = self.app.session_manager.start_session(name, project, session_type)
+        session_dir = self.app.session_manager.get_session_dir()
+        
+        # Copy video to session folder
+        dest_video = session_dir / "recording.mp4"
+        self.status_label.configure(text="Copying video...", text_color=COLORS["orange"])
+        self.window.update()
+        
+        try:
+            shutil.copy2(video_path, dest_video)
+        except Exception as e:
+            messagebox.showerror("Error", f"Failed to copy video: {e}")
+            self.app.session_manager.cancel_session()
+            return
+        
+        # Now transcribe
+        self.status_label.configure(text="Transcribing...", text_color=COLORS["orange"])
+        self.record_btn.configure(state="disabled")
+        self.window.update()
+        
+        # Run transcription in background
+        def do_transcribe():
+            self.app._transcribe(dest_video)
+        
+        self.app.state = AppState.TRANSCRIBING
+        threading.Thread(target=do_transcribe, daemon=True).start()
+    
    def _toggle_recording(self):
        """Start or stop recording."""
        if not self.app:
--- a/src/cad_documenter/recorder.py
+++ b/src/cad_documenter/recorder.py
@@ -9,6 +9,8 @@ import subprocess
 import threading
 import time
 import sys
+import os
+import signal
 from pathlib import Path
 from dataclasses import dataclass
 from typing import Optional, Callable
@@ -50,21 +52,29 @@ class ScreenRecorder:
        self.total_paused: float = 0.0
        self.output_path: Optional[Path] = None
        self.on_status = on_status or (lambda x: None)
+        self._stderr_thread: Optional[threading.Thread] = None
+        self._last_error: str = ""
    
    def _get_ffmpeg_cmd(self, config: RecordingConfig) -> list[str]:
        """Build FFmpeg command."""
        cmd = ["ffmpeg", "-y"]
        
        if sys.platform == "win32":
-            # Windows: gdigrab for screen
+            # Windows: gdigrab for screen capture
            cmd.extend([
                "-f", "gdigrab",
                "-framerate", str(config.framerate),
+                "-draw_mouse", "1",
                "-i", "desktop",
            ])
            
-            # Audio: dshow
-            audio_device = config.audio_device or "Microphone Array"
+            # Audio: dshow - try to find a working device
+            audio_device = config.audio_device
+            if not audio_device:
+                devices = self.list_audio_devices()
+                audio_device = devices[0] if devices else None
+            
+            if audio_device:
                cmd.extend([
                    "-f", "dshow",
                    "-i", f"audio={audio_device}",
@@ -84,13 +94,32 @@ class ScreenRecorder:
            "-c:v", config.video_codec,
            "-preset", config.preset,
            "-crf", str(config.crf),
-            "-c:a", config.audio_codec,
-            "-b:a", "128k",
-            str(config.output_path),
+            "-pix_fmt", "yuv420p",  # Ensure compatibility
        ])
        
+        # Only add audio codec if we have audio input
+        if sys.platform != "win32" or config.audio_device or self.list_audio_devices():
+            cmd.extend([
+                "-c:a", config.audio_codec,
+                "-b:a", "128k",
+            ])
+        
+        cmd.append(str(config.output_path))
+        
        return cmd
    
+    def _read_stderr(self):
+        """Read stderr in background to prevent blocking."""
+        if not self.process or not self.process.stderr:
+            return
+        try:
+            for line in self.process.stderr:
+                if isinstance(line, bytes):
+                    line = line.decode('utf-8', errors='ignore')
+                self._last_error = line.strip()
+        except:
+            pass
+    
    def start(self, config: RecordingConfig) -> bool:
        """Start recording."""
        if self.is_recording:
@@ -101,15 +130,36 @@ class ScreenRecorder:
        self.output_path.parent.mkdir(parents=True, exist_ok=True)
        
        cmd = self._get_ffmpeg_cmd(config)
+        self.on_status(f"Starting: {' '.join(cmd[:6])}...")
        
        try:
+            # On Windows, use different process creation flags
+            if sys.platform == "win32":
+                # CREATE_NEW_PROCESS_GROUP allows sending CTRL_BREAK_EVENT
                self.process = subprocess.Popen(
                    cmd,
                    stdin=subprocess.PIPE,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
-                creationflags=subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0,
+                    creationflags=subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.CREATE_NO_WINDOW,
                )
+            else:
+                self.process = subprocess.Popen(
+                    cmd,
+                    stdin=subprocess.PIPE,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                )
+            
+            # Start stderr reader thread
+            self._stderr_thread = threading.Thread(target=self._read_stderr, daemon=True)
+            self._stderr_thread.start()
+            
+            # Wait a moment to see if FFmpeg crashes immediately
+            time.sleep(0.5)
+            if self.process.poll() is not None:
+                self.on_status(f"FFmpeg failed: {self._last_error}")
+                return False
            
            self.is_recording = True
            self.is_paused = False
@@ -120,10 +170,10 @@ class ScreenRecorder:
            return True
            
        except FileNotFoundError:
-            self.on_status("FFmpeg not found")
+            self.on_status("FFmpeg not found - install from ffmpeg.org")
            return False
        except Exception as e:
-            self.on_status(f"Failed: {e}")
+            self.on_status(f"Failed to start: {e}")
            return False
    
    def pause(self) -> bool:
@@ -140,11 +190,20 @@ class ScreenRecorder:
                import ctypes
                kernel32 = ctypes.windll.kernel32
                handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
-                kernel32.DebugActiveProcess(self.process.pid)
+                if handle:
+                    # Use NtSuspendProcess for cleaner suspension
+                    ntdll = ctypes.windll.ntdll
+                    ntdll.NtSuspendProcess(handle)
+                    kernel32.CloseHandle(handle)
                    self.on_status("Paused")
-            except:
-                self.on_status("Paused (soft)")
                else:
+                    self.on_status("Paused (soft)")
+            except Exception as e:
+                self.on_status(f"Paused (soft): {e}")
+        else:
+            # On Linux, send SIGSTOP
+            if self.process:
+                self.process.send_signal(signal.SIGSTOP)
            self.on_status("Paused")
        
        return True
@@ -160,14 +219,22 @@ class ScreenRecorder:
        self.is_paused = False
        self.pause_start = None
        
-        # Resume FFmpeg process on Windows
+        # Resume FFmpeg process
        if sys.platform == "win32" and self.process:
            try:
                import ctypes
                kernel32 = ctypes.windll.kernel32
-                kernel32.DebugActiveProcessStop(self.process.pid)
+                handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
+                if handle:
+                    ntdll = ctypes.windll.ntdll
+                    ntdll.NtResumeProcess(handle)
+                    kernel32.CloseHandle(handle)
            except:
                pass
+        else:
+            # On Linux, send SIGCONT
+            if self.process:
+                self.process.send_signal(signal.SIGCONT)
        
        self.on_status("Recording resumed")
        return True
@@ -177,32 +244,62 @@ class ScreenRecorder:
        if not self.is_recording or not self.process:
            return None
        
-        # If paused, add final pause duration
-        if self.is_paused and self.pause_start:
+        # If paused, resume first so we can stop properly
+        if self.is_paused:
+            if self.pause_start:
                self.total_paused += time.time() - self.pause_start
-            # Resume first so we can stop properly
+            
            if sys.platform == "win32":
                try:
                    import ctypes
                    kernel32 = ctypes.windll.kernel32
-                    kernel32.DebugActiveProcessStop(self.process.pid)
+                    handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
+                    if handle:
+                        ntdll = ctypes.windll.ntdll
+                        ntdll.NtResumeProcess(handle)
+                        kernel32.CloseHandle(handle)
                except:
                    pass
+            else:
+                self.process.send_signal(signal.SIGCONT)
+            
+            time.sleep(0.2)  # Give it a moment to resume
        
        try:
-            # Send 'q' to FFmpeg
+            if sys.platform == "win32":
+                # On Windows, send CTRL_BREAK_EVENT to gracefully stop FFmpeg
+                # This works because we used CREATE_NEW_PROCESS_GROUP
+                try:
+                    os.kill(self.process.pid, signal.CTRL_BREAK_EVENT)
+                    self.process.wait(timeout=5)
+                except (subprocess.TimeoutExpired, OSError):
+                    # Fallback: try stdin 'q'
+                    try:
                        if self.process.stdin:
-                self.process.stdin.write(b"q")
+                            self.process.stdin.write(b"q\n")
                            self.process.stdin.flush()
-            
+                        self.process.wait(timeout=5)
+                    except:
+                        self.process.terminate()
+                        self.process.wait(timeout=3)
+            else:
+                # On Linux, send SIGINT (Ctrl+C equivalent)
+                self.process.send_signal(signal.SIGINT)
                self.process.wait(timeout=10)
            
        except subprocess.TimeoutExpired:
+            self.on_status("Timeout - forcing stop")
            self.process.terminate()
+            try:
                self.process.wait(timeout=5)
+            except:
+                self.process.kill()
        except Exception as e:
            self.on_status(f"Stop error: {e}")
+            try:
                self.process.terminate()
+            except:
+                pass
        
        self.is_recording = False
        self.is_paused = False
@@ -210,7 +307,15 @@ class ScreenRecorder:
        duration = self.get_duration()
        self.on_status(f"Stopped: {duration:.1f}s")
        
-        return self.output_path if self.output_path and self.output_path.exists() else None
+        # Check if output file exists and has content
+        if self.output_path and self.output_path.exists():
+            if self.output_path.stat().st_size > 1000:  # At least 1KB
+                return self.output_path
+            else:
+                self.on_status("Recording too short or failed")
+                return None
+        
+        return None
    
    def get_duration(self) -> float:
        """Get actual recording duration (excluding pauses)."""
@@ -244,6 +349,7 @@ class ScreenRecorder:
                capture_output=True,
                text=True,
                creationflags=subprocess.CREATE_NO_WINDOW,
+                timeout=10,
            )
            
            devices = []
@@ -257,9 +363,12 @@ class ScreenRecorder:
                    start = line.find('"') + 1
                    end = line.rfind('"')
                    if start < end:
-                        devices.append(line[start:end])
+                        device = line[start:end]
+                        # Skip virtual/system devices that don't work well
+                        if "virtual" not in device.lower():
+                            devices.append(device)
            
-            return devices if devices else ["Microphone Array"]
+            return devices if devices else []
            
        except:
-            return ["Microphone Array"]
+            return []