From d998a9a2b11f6a7671fd3b95f6319c99b289a914 Mon Sep 17 00:00:00 2001 From: Mario Lavoie Date: Tue, 10 Feb 2026 18:14:01 +0000 Subject: [PATCH] fix: Windows recording + pause button + Load Video feature - recorder.py: Fixed Windows FFmpeg handling - Use CTRL_BREAK_EVENT instead of stdin 'q' to stop recording - Use NtSuspendProcess/NtResumeProcess for pause/resume - Add stderr reader thread to prevent blocking - Better error handling and status messages - Check if recording actually started before returning success - gui_capture.py: Added Load Video feature - New 'Load' button in header to import existing videos - Copies video to session folder and runs transcription - Supports mp4, mkv, avi, mov, webm formats Fixes: - Record button now properly enables Pause button - Recording stops cleanly on Windows - Can load pre-recorded videos (from OBS, etc.) --- src/cad_documenter/gui_capture.py | 76 ++++++++++++ src/cad_documenter/recorder.py | 193 +++++++++++++++++++++++------- 2 files changed, 227 insertions(+), 42 deletions(-) diff --git a/src/cad_documenter/gui_capture.py b/src/cad_documenter/gui_capture.py index 86c3d7c..892b8d7 100644 --- a/src/cad_documenter/gui_capture.py +++ b/src/cad_documenter/gui_capture.py @@ -190,6 +190,18 @@ class KBCaptureGUI: command=self._browse_folder, ).pack(side="right") + # Load video button + CTkButton( + header, + text="📂 Load", + width=60, + height=32, + font=("", 10), + fg_color=COLORS["bg_card"], + hover_color=COLORS["bg_elevated"], + command=self._load_video, + ).pack(side="right", padx=(0, 8)) + # Timer card timer_frame = CTkFrame(main, fg_color=COLORS["bg_card"], corner_radius=10) timer_frame.pack(fill="x", pady=(0, 12)) @@ -498,6 +510,70 @@ class KBCaptureGUI: name_entry.bind("", lambda e: create()) + def _load_video(self): + """Load an existing video file for processing.""" + if not self.app: + messagebox.showwarning("No Folder", "Select a projects folder first") + return + + if self.app.state != AppState.IDLE: + messagebox.showwarning("Busy", "Stop current recording first") + return + + project = self.project_menu.get() + if project.startswith("("): + messagebox.showwarning("No Project", "Select a project first") + return + + # Ask for video file + video_path = filedialog.askopenfilename( + title="Select Video to Process", + filetypes=[ + ("Video files", "*.mp4 *.mkv *.avi *.mov *.webm"), + ("All files", "*.*"), + ], + ) + + if not video_path: + return + + video_path = Path(video_path) + if not video_path.exists(): + messagebox.showerror("Error", "File not found") + return + + name = self.name_entry.get().strip() or video_path.stem + session_type = SessionType.DESIGN if self.type_var.get() == "design" else SessionType.ANALYSIS + + # Create session folder and copy/link video + import shutil + session = self.app.session_manager.start_session(name, project, session_type) + session_dir = self.app.session_manager.get_session_dir() + + # Copy video to session folder + dest_video = session_dir / "recording.mp4" + self.status_label.configure(text="Copying video...", text_color=COLORS["orange"]) + self.window.update() + + try: + shutil.copy2(video_path, dest_video) + except Exception as e: + messagebox.showerror("Error", f"Failed to copy video: {e}") + self.app.session_manager.cancel_session() + return + + # Now transcribe + self.status_label.configure(text="Transcribing...", text_color=COLORS["orange"]) + self.record_btn.configure(state="disabled") + self.window.update() + + # Run transcription in background + def do_transcribe(): + self.app._transcribe(dest_video) + + self.app.state = AppState.TRANSCRIBING + threading.Thread(target=do_transcribe, daemon=True).start() + def _toggle_recording(self): """Start or stop recording.""" if not self.app: diff --git a/src/cad_documenter/recorder.py b/src/cad_documenter/recorder.py index d0e02fc..2e24f1b 100644 --- a/src/cad_documenter/recorder.py +++ b/src/cad_documenter/recorder.py @@ -9,6 +9,8 @@ import subprocess import threading import time import sys +import os +import signal from pathlib import Path from dataclasses import dataclass from typing import Optional, Callable @@ -50,25 +52,33 @@ class ScreenRecorder: self.total_paused: float = 0.0 self.output_path: Optional[Path] = None self.on_status = on_status or (lambda x: None) + self._stderr_thread: Optional[threading.Thread] = None + self._last_error: str = "" def _get_ffmpeg_cmd(self, config: RecordingConfig) -> list[str]: """Build FFmpeg command.""" cmd = ["ffmpeg", "-y"] if sys.platform == "win32": - # Windows: gdigrab for screen + # Windows: gdigrab for screen capture cmd.extend([ "-f", "gdigrab", "-framerate", str(config.framerate), + "-draw_mouse", "1", "-i", "desktop", ]) - # Audio: dshow - audio_device = config.audio_device or "Microphone Array" - cmd.extend([ - "-f", "dshow", - "-i", f"audio={audio_device}", - ]) + # Audio: dshow - try to find a working device + audio_device = config.audio_device + if not audio_device: + devices = self.list_audio_devices() + audio_device = devices[0] if devices else None + + if audio_device: + cmd.extend([ + "-f", "dshow", + "-i", f"audio={audio_device}", + ]) else: # Linux: x11grab + pulse cmd.extend([ @@ -84,13 +94,32 @@ class ScreenRecorder: "-c:v", config.video_codec, "-preset", config.preset, "-crf", str(config.crf), - "-c:a", config.audio_codec, - "-b:a", "128k", - str(config.output_path), + "-pix_fmt", "yuv420p", # Ensure compatibility ]) + # Only add audio codec if we have audio input + if sys.platform != "win32" or config.audio_device or self.list_audio_devices(): + cmd.extend([ + "-c:a", config.audio_codec, + "-b:a", "128k", + ]) + + cmd.append(str(config.output_path)) + return cmd + def _read_stderr(self): + """Read stderr in background to prevent blocking.""" + if not self.process or not self.process.stderr: + return + try: + for line in self.process.stderr: + if isinstance(line, bytes): + line = line.decode('utf-8', errors='ignore') + self._last_error = line.strip() + except: + pass + def start(self, config: RecordingConfig) -> bool: """Start recording.""" if self.is_recording: @@ -101,15 +130,36 @@ class ScreenRecorder: self.output_path.parent.mkdir(parents=True, exist_ok=True) cmd = self._get_ffmpeg_cmd(config) + self.on_status(f"Starting: {' '.join(cmd[:6])}...") try: - self.process = subprocess.Popen( - cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - creationflags=subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0, - ) + # On Windows, use different process creation flags + if sys.platform == "win32": + # CREATE_NEW_PROCESS_GROUP allows sending CTRL_BREAK_EVENT + self.process = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + creationflags=subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.CREATE_NO_WINDOW, + ) + else: + self.process = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + # Start stderr reader thread + self._stderr_thread = threading.Thread(target=self._read_stderr, daemon=True) + self._stderr_thread.start() + + # Wait a moment to see if FFmpeg crashes immediately + time.sleep(0.5) + if self.process.poll() is not None: + self.on_status(f"FFmpeg failed: {self._last_error}") + return False self.is_recording = True self.is_paused = False @@ -120,10 +170,10 @@ class ScreenRecorder: return True except FileNotFoundError: - self.on_status("FFmpeg not found") + self.on_status("FFmpeg not found - install from ffmpeg.org") return False except Exception as e: - self.on_status(f"Failed: {e}") + self.on_status(f"Failed to start: {e}") return False def pause(self) -> bool: @@ -140,11 +190,20 @@ class ScreenRecorder: import ctypes kernel32 = ctypes.windll.kernel32 handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid) - kernel32.DebugActiveProcess(self.process.pid) - self.on_status("Paused") - except: - self.on_status("Paused (soft)") + if handle: + # Use NtSuspendProcess for cleaner suspension + ntdll = ctypes.windll.ntdll + ntdll.NtSuspendProcess(handle) + kernel32.CloseHandle(handle) + self.on_status("Paused") + else: + self.on_status("Paused (soft)") + except Exception as e: + self.on_status(f"Paused (soft): {e}") else: + # On Linux, send SIGSTOP + if self.process: + self.process.send_signal(signal.SIGSTOP) self.on_status("Paused") return True @@ -160,14 +219,22 @@ class ScreenRecorder: self.is_paused = False self.pause_start = None - # Resume FFmpeg process on Windows + # Resume FFmpeg process if sys.platform == "win32" and self.process: try: import ctypes kernel32 = ctypes.windll.kernel32 - kernel32.DebugActiveProcessStop(self.process.pid) + handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid) + if handle: + ntdll = ctypes.windll.ntdll + ntdll.NtResumeProcess(handle) + kernel32.CloseHandle(handle) except: pass + else: + # On Linux, send SIGCONT + if self.process: + self.process.send_signal(signal.SIGCONT) self.on_status("Recording resumed") return True @@ -177,32 +244,62 @@ class ScreenRecorder: if not self.is_recording or not self.process: return None - # If paused, add final pause duration - if self.is_paused and self.pause_start: - self.total_paused += time.time() - self.pause_start - # Resume first so we can stop properly + # If paused, resume first so we can stop properly + if self.is_paused: + if self.pause_start: + self.total_paused += time.time() - self.pause_start + if sys.platform == "win32": try: import ctypes kernel32 = ctypes.windll.kernel32 - kernel32.DebugActiveProcessStop(self.process.pid) + handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid) + if handle: + ntdll = ctypes.windll.ntdll + ntdll.NtResumeProcess(handle) + kernel32.CloseHandle(handle) except: pass + else: + self.process.send_signal(signal.SIGCONT) + + time.sleep(0.2) # Give it a moment to resume try: - # Send 'q' to FFmpeg - if self.process.stdin: - self.process.stdin.write(b"q") - self.process.stdin.flush() - - self.process.wait(timeout=10) + if sys.platform == "win32": + # On Windows, send CTRL_BREAK_EVENT to gracefully stop FFmpeg + # This works because we used CREATE_NEW_PROCESS_GROUP + try: + os.kill(self.process.pid, signal.CTRL_BREAK_EVENT) + self.process.wait(timeout=5) + except (subprocess.TimeoutExpired, OSError): + # Fallback: try stdin 'q' + try: + if self.process.stdin: + self.process.stdin.write(b"q\n") + self.process.stdin.flush() + self.process.wait(timeout=5) + except: + self.process.terminate() + self.process.wait(timeout=3) + else: + # On Linux, send SIGINT (Ctrl+C equivalent) + self.process.send_signal(signal.SIGINT) + self.process.wait(timeout=10) except subprocess.TimeoutExpired: + self.on_status("Timeout - forcing stop") self.process.terminate() - self.process.wait(timeout=5) + try: + self.process.wait(timeout=5) + except: + self.process.kill() except Exception as e: self.on_status(f"Stop error: {e}") - self.process.terminate() + try: + self.process.terminate() + except: + pass self.is_recording = False self.is_paused = False @@ -210,7 +307,15 @@ class ScreenRecorder: duration = self.get_duration() self.on_status(f"Stopped: {duration:.1f}s") - return self.output_path if self.output_path and self.output_path.exists() else None + # Check if output file exists and has content + if self.output_path and self.output_path.exists(): + if self.output_path.stat().st_size > 1000: # At least 1KB + return self.output_path + else: + self.on_status("Recording too short or failed") + return None + + return None def get_duration(self) -> float: """Get actual recording duration (excluding pauses).""" @@ -244,6 +349,7 @@ class ScreenRecorder: capture_output=True, text=True, creationflags=subprocess.CREATE_NO_WINDOW, + timeout=10, ) devices = [] @@ -257,9 +363,12 @@ class ScreenRecorder: start = line.find('"') + 1 end = line.rfind('"') if start < end: - devices.append(line[start:end]) + device = line[start:end] + # Skip virtual/system devices that don't work well + if "virtual" not in device.lower(): + devices.append(device) - return devices if devices else ["Microphone Array"] + return devices if devices else [] except: - return ["Microphone Array"] + return []