From d998a9a2b11f6a7671fd3b95f6319c99b289a914 Mon Sep 17 00:00:00 2001
From: Mario Lavoie <mario@atomaste.ca>
Date: Tue, 10 Feb 2026 18:14:01 +0000
Subject: [PATCH] fix: Windows recording + pause button + Load Video feature

- recorder.py: Fixed Windows FFmpeg handling
  - Use CTRL_BREAK_EVENT instead of stdin 'q' to stop recording
  - Use NtSuspendProcess/NtResumeProcess for pause/resume
  - Add stderr reader thread to prevent blocking
  - Better error handling and status messages
  - Check if recording actually started before returning success

- gui_capture.py: Added Load Video feature
  - New 'Load' button in header to import existing videos
  - Copies video to session folder and runs transcription
  - Supports mp4, mkv, avi, mov, webm formats

Fixes:
- Record button now properly enables Pause button
- Recording stops cleanly on Windows
- Can load pre-recorded videos (from OBS, etc.)
---
 src/cad_documenter/gui_capture.py |  76 ++++++++++++
 src/cad_documenter/recorder.py    | 193 +++++++++++++++++++++++-------
 2 files changed, 227 insertions(+), 42 deletions(-)
diff --git a/src/cad_documenter/gui_capture.py b/src/cad_documenter/gui_capture.py
index 86c3d7c..892b8d7 100644
--- a/src/cad_documenter/gui_capture.py
+++ b/src/cad_documenter/gui_capture.py
@@ -190,6 +190,18 @@ class KBCaptureGUI:
             command=self._browse_folder,
         ).pack(side="right")
         
+        # Load video button
+        CTkButton(
+            header,
+            text="📂 Load",
+            width=60,
+            height=32,
+            font=("", 10),
+            fg_color=COLORS["bg_card"],
+            hover_color=COLORS["bg_elevated"],
+            command=self._load_video,
+        ).pack(side="right", padx=(0, 8))
+        
         # Timer card
         timer_frame = CTkFrame(main, fg_color=COLORS["bg_card"], corner_radius=10)
         timer_frame.pack(fill="x", pady=(0, 12))
@@ -498,6 +510,70 @@ class KBCaptureGUI:
         
         name_entry.bind("<Return>", lambda e: create())
     
+    def _load_video(self):
+        """Load an existing video file for processing."""
+        if not self.app:
+            messagebox.showwarning("No Folder", "Select a projects folder first")
+            return
+        
+        if self.app.state != AppState.IDLE:
+            messagebox.showwarning("Busy", "Stop current recording first")
+            return
+        
+        project = self.project_menu.get()
+        if project.startswith("("):
+            messagebox.showwarning("No Project", "Select a project first")
+            return
+        
+        # Ask for video file
+        video_path = filedialog.askopenfilename(
+            title="Select Video to Process",
+            filetypes=[
+                ("Video files", "*.mp4 *.mkv *.avi *.mov *.webm"),
+                ("All files", "*.*"),
+            ],
+        )
+        
+        if not video_path:
+            return
+        
+        video_path = Path(video_path)
+        if not video_path.exists():
+            messagebox.showerror("Error", "File not found")
+            return
+        
+        name = self.name_entry.get().strip() or video_path.stem
+        session_type = SessionType.DESIGN if self.type_var.get() == "design" else SessionType.ANALYSIS
+        
+        # Create session folder and copy/link video
+        import shutil
+        session = self.app.session_manager.start_session(name, project, session_type)
+        session_dir = self.app.session_manager.get_session_dir()
+        
+        # Copy video to session folder
+        dest_video = session_dir / "recording.mp4"
+        self.status_label.configure(text="Copying video...", text_color=COLORS["orange"])
+        self.window.update()
+        
+        try:
+            shutil.copy2(video_path, dest_video)
+        except Exception as e:
+            messagebox.showerror("Error", f"Failed to copy video: {e}")
+            self.app.session_manager.cancel_session()
+            return
+        
+        # Now transcribe
+        self.status_label.configure(text="Transcribing...", text_color=COLORS["orange"])
+        self.record_btn.configure(state="disabled")
+        self.window.update()
+        
+        # Run transcription in background
+        def do_transcribe():
+            self.app._transcribe(dest_video)
+        
+        self.app.state = AppState.TRANSCRIBING
+        threading.Thread(target=do_transcribe, daemon=True).start()
+    
     def _toggle_recording(self):
         """Start or stop recording."""
         if not self.app:
diff --git a/src/cad_documenter/recorder.py b/src/cad_documenter/recorder.py
index d0e02fc..2e24f1b 100644
--- a/src/cad_documenter/recorder.py
+++ b/src/cad_documenter/recorder.py
@@ -9,6 +9,8 @@ import subprocess
 import threading
 import time
 import sys
+import os
+import signal
 from pathlib import Path
 from dataclasses import dataclass
 from typing import Optional, Callable
@@ -50,25 +52,33 @@ class ScreenRecorder:
         self.total_paused: float = 0.0
         self.output_path: Optional[Path] = None
         self.on_status = on_status or (lambda x: None)
+        self._stderr_thread: Optional[threading.Thread] = None
+        self._last_error: str = ""
     
     def _get_ffmpeg_cmd(self, config: RecordingConfig) -> list[str]:
         """Build FFmpeg command."""
         cmd = ["ffmpeg", "-y"]
         
         if sys.platform == "win32":
-            # Windows: gdigrab for screen
+            # Windows: gdigrab for screen capture
             cmd.extend([
                 "-f", "gdigrab",
                 "-framerate", str(config.framerate),
+                "-draw_mouse", "1",
                 "-i", "desktop",
             ])
             
-            # Audio: dshow
-            audio_device = config.audio_device or "Microphone Array"
-            cmd.extend([
-                "-f", "dshow",
-                "-i", f"audio={audio_device}",
-            ])
+            # Audio: dshow - try to find a working device
+            audio_device = config.audio_device
+            if not audio_device:
+                devices = self.list_audio_devices()
+                audio_device = devices[0] if devices else None
+            
+            if audio_device:
+                cmd.extend([
+                    "-f", "dshow",
+                    "-i", f"audio={audio_device}",
+                ])
         else:
             # Linux: x11grab + pulse
             cmd.extend([
@@ -84,13 +94,32 @@ class ScreenRecorder:
             "-c:v", config.video_codec,
             "-preset", config.preset,
             "-crf", str(config.crf),
-            "-c:a", config.audio_codec,
-            "-b:a", "128k",
-            str(config.output_path),
+            "-pix_fmt", "yuv420p",  # Ensure compatibility
         ])
         
+        # Only add audio codec if we have audio input
+        if sys.platform != "win32" or config.audio_device or self.list_audio_devices():
+            cmd.extend([
+                "-c:a", config.audio_codec,
+                "-b:a", "128k",
+            ])
+        
+        cmd.append(str(config.output_path))
+        
         return cmd
     
+    def _read_stderr(self):
+        """Read stderr in background to prevent blocking."""
+        if not self.process or not self.process.stderr:
+            return
+        try:
+            for line in self.process.stderr:
+                if isinstance(line, bytes):
+                    line = line.decode('utf-8', errors='ignore')
+                self._last_error = line.strip()
+        except:
+            pass
+    
     def start(self, config: RecordingConfig) -> bool:
         """Start recording."""
         if self.is_recording:
@@ -101,15 +130,36 @@ class ScreenRecorder:
         self.output_path.parent.mkdir(parents=True, exist_ok=True)
         
         cmd = self._get_ffmpeg_cmd(config)
+        self.on_status(f"Starting: {' '.join(cmd[:6])}...")
         
         try:
-            self.process = subprocess.Popen(
-                cmd,
-                stdin=subprocess.PIPE,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.PIPE,
-                creationflags=subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0,
-            )
+            # On Windows, use different process creation flags
+            if sys.platform == "win32":
+                # CREATE_NEW_PROCESS_GROUP allows sending CTRL_BREAK_EVENT
+                self.process = subprocess.Popen(
+                    cmd,
+                    stdin=subprocess.PIPE,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                    creationflags=subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.CREATE_NO_WINDOW,
+                )
+            else:
+                self.process = subprocess.Popen(
+                    cmd,
+                    stdin=subprocess.PIPE,
+                    stdout=subprocess.PIPE,
+                    stderr=subprocess.PIPE,
+                )
+            
+            # Start stderr reader thread
+            self._stderr_thread = threading.Thread(target=self._read_stderr, daemon=True)
+            self._stderr_thread.start()
+            
+            # Wait a moment to see if FFmpeg crashes immediately
+            time.sleep(0.5)
+            if self.process.poll() is not None:
+                self.on_status(f"FFmpeg failed: {self._last_error}")
+                return False
             
             self.is_recording = True
             self.is_paused = False
@@ -120,10 +170,10 @@ class ScreenRecorder:
             return True
             
         except FileNotFoundError:
-            self.on_status("FFmpeg not found")
+            self.on_status("FFmpeg not found - install from ffmpeg.org")
             return False
         except Exception as e:
-            self.on_status(f"Failed: {e}")
+            self.on_status(f"Failed to start: {e}")
             return False
     
     def pause(self) -> bool:
@@ -140,11 +190,20 @@ class ScreenRecorder:
                 import ctypes
                 kernel32 = ctypes.windll.kernel32
                 handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
-                kernel32.DebugActiveProcess(self.process.pid)
-                self.on_status("Paused")
-            except:
-                self.on_status("Paused (soft)")
+                if handle:
+                    # Use NtSuspendProcess for cleaner suspension
+                    ntdll = ctypes.windll.ntdll
+                    ntdll.NtSuspendProcess(handle)
+                    kernel32.CloseHandle(handle)
+                    self.on_status("Paused")
+                else:
+                    self.on_status("Paused (soft)")
+            except Exception as e:
+                self.on_status(f"Paused (soft): {e}")
         else:
+            # On Linux, send SIGSTOP
+            if self.process:
+                self.process.send_signal(signal.SIGSTOP)
             self.on_status("Paused")
         
         return True
@@ -160,14 +219,22 @@ class ScreenRecorder:
         self.is_paused = False
         self.pause_start = None
         
-        # Resume FFmpeg process on Windows
+        # Resume FFmpeg process
         if sys.platform == "win32" and self.process:
             try:
                 import ctypes
                 kernel32 = ctypes.windll.kernel32
-                kernel32.DebugActiveProcessStop(self.process.pid)
+                handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
+                if handle:
+                    ntdll = ctypes.windll.ntdll
+                    ntdll.NtResumeProcess(handle)
+                    kernel32.CloseHandle(handle)
             except:
                 pass
+        else:
+            # On Linux, send SIGCONT
+            if self.process:
+                self.process.send_signal(signal.SIGCONT)
         
         self.on_status("Recording resumed")
         return True
@@ -177,32 +244,62 @@ class ScreenRecorder:
         if not self.is_recording or not self.process:
             return None
         
-        # If paused, add final pause duration
-        if self.is_paused and self.pause_start:
-            self.total_paused += time.time() - self.pause_start
-            # Resume first so we can stop properly
+        # If paused, resume first so we can stop properly
+        if self.is_paused:
+            if self.pause_start:
+                self.total_paused += time.time() - self.pause_start
+            
             if sys.platform == "win32":
                 try:
                     import ctypes
                     kernel32 = ctypes.windll.kernel32
-                    kernel32.DebugActiveProcessStop(self.process.pid)
+                    handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
+                    if handle:
+                        ntdll = ctypes.windll.ntdll
+                        ntdll.NtResumeProcess(handle)
+                        kernel32.CloseHandle(handle)
                 except:
                     pass
+            else:
+                self.process.send_signal(signal.SIGCONT)
+            
+            time.sleep(0.2)  # Give it a moment to resume
         
         try:
-            # Send 'q' to FFmpeg
-            if self.process.stdin:
-                self.process.stdin.write(b"q")
-                self.process.stdin.flush()
-            
-            self.process.wait(timeout=10)
+            if sys.platform == "win32":
+                # On Windows, send CTRL_BREAK_EVENT to gracefully stop FFmpeg
+                # This works because we used CREATE_NEW_PROCESS_GROUP
+                try:
+                    os.kill(self.process.pid, signal.CTRL_BREAK_EVENT)
+                    self.process.wait(timeout=5)
+                except (subprocess.TimeoutExpired, OSError):
+                    # Fallback: try stdin 'q'
+                    try:
+                        if self.process.stdin:
+                            self.process.stdin.write(b"q\n")
+                            self.process.stdin.flush()
+                        self.process.wait(timeout=5)
+                    except:
+                        self.process.terminate()
+                        self.process.wait(timeout=3)
+            else:
+                # On Linux, send SIGINT (Ctrl+C equivalent)
+                self.process.send_signal(signal.SIGINT)
+                self.process.wait(timeout=10)
             
         except subprocess.TimeoutExpired:
+            self.on_status("Timeout - forcing stop")
             self.process.terminate()
-            self.process.wait(timeout=5)
+            try:
+                self.process.wait(timeout=5)
+            except:
+                self.process.kill()
         except Exception as e:
             self.on_status(f"Stop error: {e}")
-            self.process.terminate()
+            try:
+                self.process.terminate()
+            except:
+                pass
         
         self.is_recording = False
         self.is_paused = False
@@ -210,7 +307,15 @@ class ScreenRecorder:
         duration = self.get_duration()
         self.on_status(f"Stopped: {duration:.1f}s")
         
-        return self.output_path if self.output_path and self.output_path.exists() else None
+        # Check if output file exists and has content
+        if self.output_path and self.output_path.exists():
+            if self.output_path.stat().st_size > 1000:  # At least 1KB
+                return self.output_path
+            else:
+                self.on_status("Recording too short or failed")
+                return None
+        
+        return None
     
     def get_duration(self) -> float:
         """Get actual recording duration (excluding pauses)."""
@@ -244,6 +349,7 @@ class ScreenRecorder:
                 capture_output=True,
                 text=True,
                 creationflags=subprocess.CREATE_NO_WINDOW,
+                timeout=10,
             )
             
             devices = []
@@ -257,9 +363,12 @@ class ScreenRecorder:
                     start = line.find('"') + 1
                     end = line.rfind('"')
                     if start < end:
-                        devices.append(line[start:end])
+                        device = line[start:end]
+                        # Skip virtual/system devices that don't work well
+                        if "virtual" not in device.lower():
+                            devices.append(device)
             
-            return devices if devices else ["Microphone Array"]
+            return devices if devices else []
             
         except:
-            return ["Microphone Array"]
+            return []