fix: Windows recording + pause button + Load Video feature

- recorder.py: Fixed Windows FFmpeg handling - Use CTRL_BREAK_EVENT instead of stdin 'q' to stop recording - Use NtSuspendProcess/NtResumeProcess for pause/resume - Add stderr reader thread to prevent blocking - Better error handling and status messages - Check if recording actually started before returning success - gui_capture.py: Added Load Video feature - New 'Load' button in header to import existing videos - Copies video to session folder and runs transcription - Supports mp4, mkv, avi, mov, webm formats Fixes: - Record button now properly enables Pause button - Recording stops cleanly on Windows - Can load pre-recorded videos (from OBS, etc.)
2026-02-10 18:14:01 +00:00
parent 9b24478f04
commit d998a9a2b1
2 changed files with 227 additions and 42 deletions
--- a/src/cad_documenter/gui_capture.py
+++ b/src/cad_documenter/gui_capture.py
@@ -190,6 +190,18 @@ class KBCaptureGUI:
            command=self._browse_folder,
        ).pack(side="right")
        # Load video button
        CTkButton(
            header,
            text="📂 Load",
            width=60,
            height=32,
            font=("", 10),
            fg_color=COLORS["bg_card"],
            hover_color=COLORS["bg_elevated"],
            command=self._load_video,
        ).pack(side="right", padx=(0, 8))
        # Timer card
        timer_frame = CTkFrame(main, fg_color=COLORS["bg_card"], corner_radius=10)
        timer_frame.pack(fill="x", pady=(0, 12))
@@ -498,6 +510,70 @@ class KBCaptureGUI:
        name_entry.bind("<Return>", lambda e: create())
    def _load_video(self):
        """Load an existing video file for processing."""
        if not self.app:
            messagebox.showwarning("No Folder", "Select a projects folder first")
            return
        if self.app.state != AppState.IDLE:
            messagebox.showwarning("Busy", "Stop current recording first")
            return
        project = self.project_menu.get()
        if project.startswith("("):
            messagebox.showwarning("No Project", "Select a project first")
            return
        # Ask for video file
        video_path = filedialog.askopenfilename(
            title="Select Video to Process",
            filetypes=[
                ("Video files", "*.mp4 *.mkv *.avi *.mov *.webm"),
                ("All files", "*.*"),
            ],
        )
        if not video_path:
            return
        video_path = Path(video_path)
        if not video_path.exists():
            messagebox.showerror("Error", "File not found")
            return
        name = self.name_entry.get().strip() or video_path.stem
        session_type = SessionType.DESIGN if self.type_var.get() == "design" else SessionType.ANALYSIS
        # Create session folder and copy/link video
        import shutil
        session = self.app.session_manager.start_session(name, project, session_type)
        session_dir = self.app.session_manager.get_session_dir()
        # Copy video to session folder
        dest_video = session_dir / "recording.mp4"
        self.status_label.configure(text="Copying video...", text_color=COLORS["orange"])
        self.window.update()
        try:
            shutil.copy2(video_path, dest_video)
        except Exception as e:
            messagebox.showerror("Error", f"Failed to copy video: {e}")
            self.app.session_manager.cancel_session()
            return
        # Now transcribe
        self.status_label.configure(text="Transcribing...", text_color=COLORS["orange"])
        self.record_btn.configure(state="disabled")
        self.window.update()
        # Run transcription in background
        def do_transcribe():
            self.app._transcribe(dest_video)
        self.app.state = AppState.TRANSCRIBING
        threading.Thread(target=do_transcribe, daemon=True).start()
    def _toggle_recording(self):
        """Start or stop recording."""
        if not self.app:
--- a/src/cad_documenter/recorder.py
+++ b/src/cad_documenter/recorder.py
@@ -9,6 +9,8 @@ import subprocess
 import threading
 import time
 import sys
 import os
 import signal
 from pathlib import Path
 from dataclasses import dataclass
 from typing import Optional, Callable
@@ -50,21 +52,29 @@ class ScreenRecorder:
        self.total_paused: float = 0.0
        self.output_path: Optional[Path] = None
        self.on_status = on_status or (lambda x: None)
        self._stderr_thread: Optional[threading.Thread] = None
        self._last_error: str = ""
    def _get_ffmpeg_cmd(self, config: RecordingConfig) -> list[str]:
        """Build FFmpeg command."""
        cmd = ["ffmpeg", "-y"]
        if sys.platform == "win32":
-            # Windows: gdigrab for screen
+            # Windows: gdigrab for screen capture
            cmd.extend([
                "-f", "gdigrab",
                "-framerate", str(config.framerate),
                "-draw_mouse", "1",
                "-i", "desktop",
            ])
-            # Audio: dshow
+            # Audio: dshow - try to find a working device
-            audio_device = config.audio_device or "Microphone Array"
+            audio_device = config.audio_device
            if not audio_device:
                devices = self.list_audio_devices()
                audio_device = devices[0] if devices else None
            if audio_device:
                cmd.extend([
                    "-f", "dshow",
                    "-i", f"audio={audio_device}",
@@ -84,13 +94,32 @@ class ScreenRecorder:
            "-c:v", config.video_codec,
            "-preset", config.preset,
            "-crf", str(config.crf),
-            "-c:a", config.audio_codec,
+            "-pix_fmt", "yuv420p",  # Ensure compatibility
            "-b:a", "128k",
            str(config.output_path),
        ])
        # Only add audio codec if we have audio input
        if sys.platform != "win32" or config.audio_device or self.list_audio_devices():
            cmd.extend([
                "-c:a", config.audio_codec,
                "-b:a", "128k",
            ])
        cmd.append(str(config.output_path))
        return cmd
    def _read_stderr(self):
        """Read stderr in background to prevent blocking."""
        if not self.process or not self.process.stderr:
            return
        try:
            for line in self.process.stderr:
                if isinstance(line, bytes):
                    line = line.decode('utf-8', errors='ignore')
                self._last_error = line.strip()
        except:
            pass
    def start(self, config: RecordingConfig) -> bool:
        """Start recording."""
        if self.is_recording:
@@ -101,15 +130,36 @@ class ScreenRecorder:
        self.output_path.parent.mkdir(parents=True, exist_ok=True)
        cmd = self._get_ffmpeg_cmd(config)
        self.on_status(f"Starting: {' '.join(cmd[:6])}...")
        try:
            # On Windows, use different process creation flags
            if sys.platform == "win32":
                # CREATE_NEW_PROCESS_GROUP allows sending CTRL_BREAK_EVENT
                self.process = subprocess.Popen(
                    cmd,
                    stdin=subprocess.PIPE,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
-                creationflags=subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0,
+                    creationflags=subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.CREATE_NO_WINDOW,
                )
            else:
                self.process = subprocess.Popen(
                    cmd,
                    stdin=subprocess.PIPE,
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                )
            # Start stderr reader thread
            self._stderr_thread = threading.Thread(target=self._read_stderr, daemon=True)
            self._stderr_thread.start()
            # Wait a moment to see if FFmpeg crashes immediately
            time.sleep(0.5)
            if self.process.poll() is not None:
                self.on_status(f"FFmpeg failed: {self._last_error}")
                return False
            self.is_recording = True
            self.is_paused = False
@@ -120,10 +170,10 @@ class ScreenRecorder:
            return True
        except FileNotFoundError:
-            self.on_status("FFmpeg not found")
+            self.on_status("FFmpeg not found - install from ffmpeg.org")
            return False
        except Exception as e:
-            self.on_status(f"Failed: {e}")
+            self.on_status(f"Failed to start: {e}")
            return False
    def pause(self) -> bool:
@@ -140,11 +190,20 @@ class ScreenRecorder:
                import ctypes
                kernel32 = ctypes.windll.kernel32
                handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
-                kernel32.DebugActiveProcess(self.process.pid)
+                if handle:
                    # Use NtSuspendProcess for cleaner suspension
                    ntdll = ctypes.windll.ntdll
                    ntdll.NtSuspendProcess(handle)
                    kernel32.CloseHandle(handle)
                    self.on_status("Paused")
            except:
                self.on_status("Paused (soft)")
                else:
                    self.on_status("Paused (soft)")
            except Exception as e:
                self.on_status(f"Paused (soft): {e}")
        else:
            # On Linux, send SIGSTOP
            if self.process:
                self.process.send_signal(signal.SIGSTOP)
            self.on_status("Paused")
        return True
@@ -160,14 +219,22 @@ class ScreenRecorder:
        self.is_paused = False
        self.pause_start = None
-        # Resume FFmpeg process on Windows
+        # Resume FFmpeg process
        if sys.platform == "win32" and self.process:
            try:
                import ctypes
                kernel32 = ctypes.windll.kernel32
-                kernel32.DebugActiveProcessStop(self.process.pid)
+                handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
                if handle:
                    ntdll = ctypes.windll.ntdll
                    ntdll.NtResumeProcess(handle)
                    kernel32.CloseHandle(handle)
            except:
                pass
        else:
            # On Linux, send SIGCONT
            if self.process:
                self.process.send_signal(signal.SIGCONT)
        self.on_status("Recording resumed")
        return True
@@ -177,32 +244,62 @@ class ScreenRecorder:
        if not self.is_recording or not self.process:
            return None
-        # If paused, add final pause duration
+        # If paused, resume first so we can stop properly
-        if self.is_paused and self.pause_start:
+        if self.is_paused:
            if self.pause_start:
                self.total_paused += time.time() - self.pause_start
-            # Resume first so we can stop properly
+            
            if sys.platform == "win32":
                try:
                    import ctypes
                    kernel32 = ctypes.windll.kernel32
-                    kernel32.DebugActiveProcessStop(self.process.pid)
+                    handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
                    if handle:
                        ntdll = ctypes.windll.ntdll
                        ntdll.NtResumeProcess(handle)
                        kernel32.CloseHandle(handle)
                except:
                    pass
            else:
                self.process.send_signal(signal.SIGCONT)
            time.sleep(0.2)  # Give it a moment to resume
        try:
-            # Send 'q' to FFmpeg
+            if sys.platform == "win32":
                # On Windows, send CTRL_BREAK_EVENT to gracefully stop FFmpeg
                # This works because we used CREATE_NEW_PROCESS_GROUP
                try:
                    os.kill(self.process.pid, signal.CTRL_BREAK_EVENT)
                    self.process.wait(timeout=5)
                except (subprocess.TimeoutExpired, OSError):
                    # Fallback: try stdin 'q'
                    try:
                        if self.process.stdin:
-                self.process.stdin.write(b"q")
+                            self.process.stdin.write(b"q\n")
                            self.process.stdin.flush()
-            
+                        self.process.wait(timeout=5)
                    except:
                        self.process.terminate()
                        self.process.wait(timeout=3)
            else:
                # On Linux, send SIGINT (Ctrl+C equivalent)
                self.process.send_signal(signal.SIGINT)
                self.process.wait(timeout=10)
        except subprocess.TimeoutExpired:
            self.on_status("Timeout - forcing stop")
            self.process.terminate()
            try:
                self.process.wait(timeout=5)
            except:
                self.process.kill()
        except Exception as e:
            self.on_status(f"Stop error: {e}")
            try:
                self.process.terminate()
            except:
                pass
        self.is_recording = False
        self.is_paused = False
@@ -210,7 +307,15 @@ class ScreenRecorder:
        duration = self.get_duration()
        self.on_status(f"Stopped: {duration:.1f}s")
-        return self.output_path if self.output_path and self.output_path.exists() else None
+        # Check if output file exists and has content
        if self.output_path and self.output_path.exists():
            if self.output_path.stat().st_size > 1000:  # At least 1KB
                return self.output_path
            else:
                self.on_status("Recording too short or failed")
                return None
        return None
    def get_duration(self) -> float:
        """Get actual recording duration (excluding pauses)."""
@@ -244,6 +349,7 @@ class ScreenRecorder:
                capture_output=True,
                text=True,
                creationflags=subprocess.CREATE_NO_WINDOW,
                timeout=10,
            )
            devices = []
@@ -257,9 +363,12 @@ class ScreenRecorder:
                    start = line.find('"') + 1
                    end = line.rfind('"')
                    if start < end:
-                        devices.append(line[start:end])
+                        device = line[start:end]
                        # Skip virtual/system devices that don't work well
                        if "virtual" not in device.lower():
                            devices.append(device)
-            return devices if devices else ["Microphone Array"]
+            return devices if devices else []
        except:
-            return ["Microphone Array"]
+            return []