fix: Windows recording + pause button + Load Video feature

- recorder.py: Fixed Windows FFmpeg handling
  - Use CTRL_BREAK_EVENT instead of stdin 'q' to stop recording
  - Use NtSuspendProcess/NtResumeProcess for pause/resume
  - Add stderr reader thread to prevent blocking
  - Better error handling and status messages
  - Check if recording actually started before returning success

- gui_capture.py: Added Load Video feature
  - New 'Load' button in header to import existing videos
  - Copies video to session folder and runs transcription
  - Supports mp4, mkv, avi, mov, webm formats

Fixes:
- Record button now properly enables Pause button
- Recording stops cleanly on Windows
- Can load pre-recorded videos (from OBS, etc.)
This commit is contained in:
Mario Lavoie
2026-02-10 18:14:01 +00:00
parent 9b24478f04
commit d998a9a2b1
2 changed files with 227 additions and 42 deletions

View File

@@ -190,6 +190,18 @@ class KBCaptureGUI:
command=self._browse_folder, command=self._browse_folder,
).pack(side="right") ).pack(side="right")
# Load video button
CTkButton(
header,
text="📂 Load",
width=60,
height=32,
font=("", 10),
fg_color=COLORS["bg_card"],
hover_color=COLORS["bg_elevated"],
command=self._load_video,
).pack(side="right", padx=(0, 8))
# Timer card # Timer card
timer_frame = CTkFrame(main, fg_color=COLORS["bg_card"], corner_radius=10) timer_frame = CTkFrame(main, fg_color=COLORS["bg_card"], corner_radius=10)
timer_frame.pack(fill="x", pady=(0, 12)) timer_frame.pack(fill="x", pady=(0, 12))
@@ -498,6 +510,70 @@ class KBCaptureGUI:
name_entry.bind("<Return>", lambda e: create()) name_entry.bind("<Return>", lambda e: create())
def _load_video(self):
"""Load an existing video file for processing."""
if not self.app:
messagebox.showwarning("No Folder", "Select a projects folder first")
return
if self.app.state != AppState.IDLE:
messagebox.showwarning("Busy", "Stop current recording first")
return
project = self.project_menu.get()
if project.startswith("("):
messagebox.showwarning("No Project", "Select a project first")
return
# Ask for video file
video_path = filedialog.askopenfilename(
title="Select Video to Process",
filetypes=[
("Video files", "*.mp4 *.mkv *.avi *.mov *.webm"),
("All files", "*.*"),
],
)
if not video_path:
return
video_path = Path(video_path)
if not video_path.exists():
messagebox.showerror("Error", "File not found")
return
name = self.name_entry.get().strip() or video_path.stem
session_type = SessionType.DESIGN if self.type_var.get() == "design" else SessionType.ANALYSIS
# Create session folder and copy/link video
import shutil
session = self.app.session_manager.start_session(name, project, session_type)
session_dir = self.app.session_manager.get_session_dir()
# Copy video to session folder
dest_video = session_dir / "recording.mp4"
self.status_label.configure(text="Copying video...", text_color=COLORS["orange"])
self.window.update()
try:
shutil.copy2(video_path, dest_video)
except Exception as e:
messagebox.showerror("Error", f"Failed to copy video: {e}")
self.app.session_manager.cancel_session()
return
# Now transcribe
self.status_label.configure(text="Transcribing...", text_color=COLORS["orange"])
self.record_btn.configure(state="disabled")
self.window.update()
# Run transcription in background
def do_transcribe():
self.app._transcribe(dest_video)
self.app.state = AppState.TRANSCRIBING
threading.Thread(target=do_transcribe, daemon=True).start()
def _toggle_recording(self): def _toggle_recording(self):
"""Start or stop recording.""" """Start or stop recording."""
if not self.app: if not self.app:

View File

@@ -9,6 +9,8 @@ import subprocess
import threading import threading
import time import time
import sys import sys
import os
import signal
from pathlib import Path from pathlib import Path
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Callable from typing import Optional, Callable
@@ -50,21 +52,29 @@ class ScreenRecorder:
self.total_paused: float = 0.0 self.total_paused: float = 0.0
self.output_path: Optional[Path] = None self.output_path: Optional[Path] = None
self.on_status = on_status or (lambda x: None) self.on_status = on_status or (lambda x: None)
self._stderr_thread: Optional[threading.Thread] = None
self._last_error: str = ""
def _get_ffmpeg_cmd(self, config: RecordingConfig) -> list[str]: def _get_ffmpeg_cmd(self, config: RecordingConfig) -> list[str]:
"""Build FFmpeg command.""" """Build FFmpeg command."""
cmd = ["ffmpeg", "-y"] cmd = ["ffmpeg", "-y"]
if sys.platform == "win32": if sys.platform == "win32":
# Windows: gdigrab for screen # Windows: gdigrab for screen capture
cmd.extend([ cmd.extend([
"-f", "gdigrab", "-f", "gdigrab",
"-framerate", str(config.framerate), "-framerate", str(config.framerate),
"-draw_mouse", "1",
"-i", "desktop", "-i", "desktop",
]) ])
# Audio: dshow # Audio: dshow - try to find a working device
audio_device = config.audio_device or "Microphone Array" audio_device = config.audio_device
if not audio_device:
devices = self.list_audio_devices()
audio_device = devices[0] if devices else None
if audio_device:
cmd.extend([ cmd.extend([
"-f", "dshow", "-f", "dshow",
"-i", f"audio={audio_device}", "-i", f"audio={audio_device}",
@@ -84,13 +94,32 @@ class ScreenRecorder:
"-c:v", config.video_codec, "-c:v", config.video_codec,
"-preset", config.preset, "-preset", config.preset,
"-crf", str(config.crf), "-crf", str(config.crf),
"-c:a", config.audio_codec, "-pix_fmt", "yuv420p", # Ensure compatibility
"-b:a", "128k",
str(config.output_path),
]) ])
# Only add audio codec if we have audio input
if sys.platform != "win32" or config.audio_device or self.list_audio_devices():
cmd.extend([
"-c:a", config.audio_codec,
"-b:a", "128k",
])
cmd.append(str(config.output_path))
return cmd return cmd
def _read_stderr(self):
"""Read stderr in background to prevent blocking."""
if not self.process or not self.process.stderr:
return
try:
for line in self.process.stderr:
if isinstance(line, bytes):
line = line.decode('utf-8', errors='ignore')
self._last_error = line.strip()
except:
pass
def start(self, config: RecordingConfig) -> bool: def start(self, config: RecordingConfig) -> bool:
"""Start recording.""" """Start recording."""
if self.is_recording: if self.is_recording:
@@ -101,15 +130,36 @@ class ScreenRecorder:
self.output_path.parent.mkdir(parents=True, exist_ok=True) self.output_path.parent.mkdir(parents=True, exist_ok=True)
cmd = self._get_ffmpeg_cmd(config) cmd = self._get_ffmpeg_cmd(config)
self.on_status(f"Starting: {' '.join(cmd[:6])}...")
try: try:
# On Windows, use different process creation flags
if sys.platform == "win32":
# CREATE_NEW_PROCESS_GROUP allows sending CTRL_BREAK_EVENT
self.process = subprocess.Popen( self.process = subprocess.Popen(
cmd, cmd,
stdin=subprocess.PIPE, stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
creationflags=subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0, creationflags=subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.CREATE_NO_WINDOW,
) )
else:
self.process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
# Start stderr reader thread
self._stderr_thread = threading.Thread(target=self._read_stderr, daemon=True)
self._stderr_thread.start()
# Wait a moment to see if FFmpeg crashes immediately
time.sleep(0.5)
if self.process.poll() is not None:
self.on_status(f"FFmpeg failed: {self._last_error}")
return False
self.is_recording = True self.is_recording = True
self.is_paused = False self.is_paused = False
@@ -120,10 +170,10 @@ class ScreenRecorder:
return True return True
except FileNotFoundError: except FileNotFoundError:
self.on_status("FFmpeg not found") self.on_status("FFmpeg not found - install from ffmpeg.org")
return False return False
except Exception as e: except Exception as e:
self.on_status(f"Failed: {e}") self.on_status(f"Failed to start: {e}")
return False return False
def pause(self) -> bool: def pause(self) -> bool:
@@ -140,11 +190,20 @@ class ScreenRecorder:
import ctypes import ctypes
kernel32 = ctypes.windll.kernel32 kernel32 = ctypes.windll.kernel32
handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid) handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
kernel32.DebugActiveProcess(self.process.pid) if handle:
# Use NtSuspendProcess for cleaner suspension
ntdll = ctypes.windll.ntdll
ntdll.NtSuspendProcess(handle)
kernel32.CloseHandle(handle)
self.on_status("Paused") self.on_status("Paused")
except:
self.on_status("Paused (soft)")
else: else:
self.on_status("Paused (soft)")
except Exception as e:
self.on_status(f"Paused (soft): {e}")
else:
# On Linux, send SIGSTOP
if self.process:
self.process.send_signal(signal.SIGSTOP)
self.on_status("Paused") self.on_status("Paused")
return True return True
@@ -160,14 +219,22 @@ class ScreenRecorder:
self.is_paused = False self.is_paused = False
self.pause_start = None self.pause_start = None
# Resume FFmpeg process on Windows # Resume FFmpeg process
if sys.platform == "win32" and self.process: if sys.platform == "win32" and self.process:
try: try:
import ctypes import ctypes
kernel32 = ctypes.windll.kernel32 kernel32 = ctypes.windll.kernel32
kernel32.DebugActiveProcessStop(self.process.pid) handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
if handle:
ntdll = ctypes.windll.ntdll
ntdll.NtResumeProcess(handle)
kernel32.CloseHandle(handle)
except: except:
pass pass
else:
# On Linux, send SIGCONT
if self.process:
self.process.send_signal(signal.SIGCONT)
self.on_status("Recording resumed") self.on_status("Recording resumed")
return True return True
@@ -177,32 +244,62 @@ class ScreenRecorder:
if not self.is_recording or not self.process: if not self.is_recording or not self.process:
return None return None
# If paused, add final pause duration # If paused, resume first so we can stop properly
if self.is_paused and self.pause_start: if self.is_paused:
if self.pause_start:
self.total_paused += time.time() - self.pause_start self.total_paused += time.time() - self.pause_start
# Resume first so we can stop properly
if sys.platform == "win32": if sys.platform == "win32":
try: try:
import ctypes import ctypes
kernel32 = ctypes.windll.kernel32 kernel32 = ctypes.windll.kernel32
kernel32.DebugActiveProcessStop(self.process.pid) handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
if handle:
ntdll = ctypes.windll.ntdll
ntdll.NtResumeProcess(handle)
kernel32.CloseHandle(handle)
except: except:
pass pass
else:
self.process.send_signal(signal.SIGCONT)
time.sleep(0.2) # Give it a moment to resume
try: try:
# Send 'q' to FFmpeg if sys.platform == "win32":
# On Windows, send CTRL_BREAK_EVENT to gracefully stop FFmpeg
# This works because we used CREATE_NEW_PROCESS_GROUP
try:
os.kill(self.process.pid, signal.CTRL_BREAK_EVENT)
self.process.wait(timeout=5)
except (subprocess.TimeoutExpired, OSError):
# Fallback: try stdin 'q'
try:
if self.process.stdin: if self.process.stdin:
self.process.stdin.write(b"q") self.process.stdin.write(b"q\n")
self.process.stdin.flush() self.process.stdin.flush()
self.process.wait(timeout=5)
except:
self.process.terminate()
self.process.wait(timeout=3)
else:
# On Linux, send SIGINT (Ctrl+C equivalent)
self.process.send_signal(signal.SIGINT)
self.process.wait(timeout=10) self.process.wait(timeout=10)
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
self.on_status("Timeout - forcing stop")
self.process.terminate() self.process.terminate()
try:
self.process.wait(timeout=5) self.process.wait(timeout=5)
except:
self.process.kill()
except Exception as e: except Exception as e:
self.on_status(f"Stop error: {e}") self.on_status(f"Stop error: {e}")
try:
self.process.terminate() self.process.terminate()
except:
pass
self.is_recording = False self.is_recording = False
self.is_paused = False self.is_paused = False
@@ -210,7 +307,15 @@ class ScreenRecorder:
duration = self.get_duration() duration = self.get_duration()
self.on_status(f"Stopped: {duration:.1f}s") self.on_status(f"Stopped: {duration:.1f}s")
return self.output_path if self.output_path and self.output_path.exists() else None # Check if output file exists and has content
if self.output_path and self.output_path.exists():
if self.output_path.stat().st_size > 1000: # At least 1KB
return self.output_path
else:
self.on_status("Recording too short or failed")
return None
return None
def get_duration(self) -> float: def get_duration(self) -> float:
"""Get actual recording duration (excluding pauses).""" """Get actual recording duration (excluding pauses)."""
@@ -244,6 +349,7 @@ class ScreenRecorder:
capture_output=True, capture_output=True,
text=True, text=True,
creationflags=subprocess.CREATE_NO_WINDOW, creationflags=subprocess.CREATE_NO_WINDOW,
timeout=10,
) )
devices = [] devices = []
@@ -257,9 +363,12 @@ class ScreenRecorder:
start = line.find('"') + 1 start = line.find('"') + 1
end = line.rfind('"') end = line.rfind('"')
if start < end: if start < end:
devices.append(line[start:end]) device = line[start:end]
# Skip virtual/system devices that don't work well
if "virtual" not in device.lower():
devices.append(device)
return devices if devices else ["Microphone Array"] return devices if devices else []
except: except:
return ["Microphone Array"] return []