fix: Windows recording + pause button + Load Video feature

- recorder.py: Fixed Windows FFmpeg handling
  - Use CTRL_BREAK_EVENT instead of stdin 'q' to stop recording
  - Use NtSuspendProcess/NtResumeProcess for pause/resume
  - Add stderr reader thread to prevent blocking
  - Better error handling and status messages
  - Check if recording actually started before returning success

- gui_capture.py: Added Load Video feature
  - New 'Load' button in header to import existing videos
  - Copies video to session folder and runs transcription
  - Supports mp4, mkv, avi, mov, webm formats

Fixes:
- Record button now properly enables Pause button
- Recording stops cleanly on Windows
- Can load pre-recorded videos (from OBS, etc.)
This commit is contained in:
Mario Lavoie
2026-02-10 18:14:01 +00:00
parent 9b24478f04
commit d998a9a2b1
2 changed files with 227 additions and 42 deletions

View File

@@ -190,6 +190,18 @@ class KBCaptureGUI:
command=self._browse_folder,
).pack(side="right")
# Load video button
CTkButton(
header,
text="📂 Load",
width=60,
height=32,
font=("", 10),
fg_color=COLORS["bg_card"],
hover_color=COLORS["bg_elevated"],
command=self._load_video,
).pack(side="right", padx=(0, 8))
# Timer card
timer_frame = CTkFrame(main, fg_color=COLORS["bg_card"], corner_radius=10)
timer_frame.pack(fill="x", pady=(0, 12))
@@ -498,6 +510,70 @@ class KBCaptureGUI:
name_entry.bind("<Return>", lambda e: create())
def _load_video(self):
"""Load an existing video file for processing."""
if not self.app:
messagebox.showwarning("No Folder", "Select a projects folder first")
return
if self.app.state != AppState.IDLE:
messagebox.showwarning("Busy", "Stop current recording first")
return
project = self.project_menu.get()
if project.startswith("("):
messagebox.showwarning("No Project", "Select a project first")
return
# Ask for video file
video_path = filedialog.askopenfilename(
title="Select Video to Process",
filetypes=[
("Video files", "*.mp4 *.mkv *.avi *.mov *.webm"),
("All files", "*.*"),
],
)
if not video_path:
return
video_path = Path(video_path)
if not video_path.exists():
messagebox.showerror("Error", "File not found")
return
name = self.name_entry.get().strip() or video_path.stem
session_type = SessionType.DESIGN if self.type_var.get() == "design" else SessionType.ANALYSIS
# Create session folder and copy/link video
import shutil
session = self.app.session_manager.start_session(name, project, session_type)
session_dir = self.app.session_manager.get_session_dir()
# Copy video to session folder
dest_video = session_dir / "recording.mp4"
self.status_label.configure(text="Copying video...", text_color=COLORS["orange"])
self.window.update()
try:
shutil.copy2(video_path, dest_video)
except Exception as e:
messagebox.showerror("Error", f"Failed to copy video: {e}")
self.app.session_manager.cancel_session()
return
# Now transcribe
self.status_label.configure(text="Transcribing...", text_color=COLORS["orange"])
self.record_btn.configure(state="disabled")
self.window.update()
# Run transcription in background
def do_transcribe():
self.app._transcribe(dest_video)
self.app.state = AppState.TRANSCRIBING
threading.Thread(target=do_transcribe, daemon=True).start()
def _toggle_recording(self):
"""Start or stop recording."""
if not self.app:

View File

@@ -9,6 +9,8 @@ import subprocess
import threading
import time
import sys
import os
import signal
from pathlib import Path
from dataclasses import dataclass
from typing import Optional, Callable
@@ -50,21 +52,29 @@ class ScreenRecorder:
self.total_paused: float = 0.0
self.output_path: Optional[Path] = None
self.on_status = on_status or (lambda x: None)
self._stderr_thread: Optional[threading.Thread] = None
self._last_error: str = ""
def _get_ffmpeg_cmd(self, config: RecordingConfig) -> list[str]:
"""Build FFmpeg command."""
cmd = ["ffmpeg", "-y"]
if sys.platform == "win32":
# Windows: gdigrab for screen
# Windows: gdigrab for screen capture
cmd.extend([
"-f", "gdigrab",
"-framerate", str(config.framerate),
"-draw_mouse", "1",
"-i", "desktop",
])
# Audio: dshow
audio_device = config.audio_device or "Microphone Array"
# Audio: dshow - try to find a working device
audio_device = config.audio_device
if not audio_device:
devices = self.list_audio_devices()
audio_device = devices[0] if devices else None
if audio_device:
cmd.extend([
"-f", "dshow",
"-i", f"audio={audio_device}",
@@ -84,13 +94,32 @@ class ScreenRecorder:
"-c:v", config.video_codec,
"-preset", config.preset,
"-crf", str(config.crf),
"-c:a", config.audio_codec,
"-b:a", "128k",
str(config.output_path),
"-pix_fmt", "yuv420p", # Ensure compatibility
])
# Only add audio codec if we have audio input
if sys.platform != "win32" or config.audio_device or self.list_audio_devices():
cmd.extend([
"-c:a", config.audio_codec,
"-b:a", "128k",
])
cmd.append(str(config.output_path))
return cmd
def _read_stderr(self):
"""Read stderr in background to prevent blocking."""
if not self.process or not self.process.stderr:
return
try:
for line in self.process.stderr:
if isinstance(line, bytes):
line = line.decode('utf-8', errors='ignore')
self._last_error = line.strip()
except:
pass
def start(self, config: RecordingConfig) -> bool:
"""Start recording."""
if self.is_recording:
@@ -101,15 +130,36 @@ class ScreenRecorder:
self.output_path.parent.mkdir(parents=True, exist_ok=True)
cmd = self._get_ffmpeg_cmd(config)
self.on_status(f"Starting: {' '.join(cmd[:6])}...")
try:
# On Windows, use different process creation flags
if sys.platform == "win32":
# CREATE_NEW_PROCESS_GROUP allows sending CTRL_BREAK_EVENT
self.process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
creationflags=subprocess.CREATE_NO_WINDOW if sys.platform == "win32" else 0,
creationflags=subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.CREATE_NO_WINDOW,
)
else:
self.process = subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
# Start stderr reader thread
self._stderr_thread = threading.Thread(target=self._read_stderr, daemon=True)
self._stderr_thread.start()
# Wait a moment to see if FFmpeg crashes immediately
time.sleep(0.5)
if self.process.poll() is not None:
self.on_status(f"FFmpeg failed: {self._last_error}")
return False
self.is_recording = True
self.is_paused = False
@@ -120,10 +170,10 @@ class ScreenRecorder:
return True
except FileNotFoundError:
self.on_status("FFmpeg not found")
self.on_status("FFmpeg not found - install from ffmpeg.org")
return False
except Exception as e:
self.on_status(f"Failed: {e}")
self.on_status(f"Failed to start: {e}")
return False
def pause(self) -> bool:
@@ -140,11 +190,20 @@ class ScreenRecorder:
import ctypes
kernel32 = ctypes.windll.kernel32
handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
kernel32.DebugActiveProcess(self.process.pid)
if handle:
# Use NtSuspendProcess for cleaner suspension
ntdll = ctypes.windll.ntdll
ntdll.NtSuspendProcess(handle)
kernel32.CloseHandle(handle)
self.on_status("Paused")
except:
self.on_status("Paused (soft)")
else:
self.on_status("Paused (soft)")
except Exception as e:
self.on_status(f"Paused (soft): {e}")
else:
# On Linux, send SIGSTOP
if self.process:
self.process.send_signal(signal.SIGSTOP)
self.on_status("Paused")
return True
@@ -160,14 +219,22 @@ class ScreenRecorder:
self.is_paused = False
self.pause_start = None
# Resume FFmpeg process on Windows
# Resume FFmpeg process
if sys.platform == "win32" and self.process:
try:
import ctypes
kernel32 = ctypes.windll.kernel32
kernel32.DebugActiveProcessStop(self.process.pid)
handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
if handle:
ntdll = ctypes.windll.ntdll
ntdll.NtResumeProcess(handle)
kernel32.CloseHandle(handle)
except:
pass
else:
# On Linux, send SIGCONT
if self.process:
self.process.send_signal(signal.SIGCONT)
self.on_status("Recording resumed")
return True
@@ -177,32 +244,62 @@ class ScreenRecorder:
if not self.is_recording or not self.process:
return None
# If paused, add final pause duration
if self.is_paused and self.pause_start:
# If paused, resume first so we can stop properly
if self.is_paused:
if self.pause_start:
self.total_paused += time.time() - self.pause_start
# Resume first so we can stop properly
if sys.platform == "win32":
try:
import ctypes
kernel32 = ctypes.windll.kernel32
kernel32.DebugActiveProcessStop(self.process.pid)
handle = kernel32.OpenProcess(0x1F0FFF, False, self.process.pid)
if handle:
ntdll = ctypes.windll.ntdll
ntdll.NtResumeProcess(handle)
kernel32.CloseHandle(handle)
except:
pass
else:
self.process.send_signal(signal.SIGCONT)
time.sleep(0.2) # Give it a moment to resume
try:
# Send 'q' to FFmpeg
if sys.platform == "win32":
# On Windows, send CTRL_BREAK_EVENT to gracefully stop FFmpeg
# This works because we used CREATE_NEW_PROCESS_GROUP
try:
os.kill(self.process.pid, signal.CTRL_BREAK_EVENT)
self.process.wait(timeout=5)
except (subprocess.TimeoutExpired, OSError):
# Fallback: try stdin 'q'
try:
if self.process.stdin:
self.process.stdin.write(b"q")
self.process.stdin.write(b"q\n")
self.process.stdin.flush()
self.process.wait(timeout=5)
except:
self.process.terminate()
self.process.wait(timeout=3)
else:
# On Linux, send SIGINT (Ctrl+C equivalent)
self.process.send_signal(signal.SIGINT)
self.process.wait(timeout=10)
except subprocess.TimeoutExpired:
self.on_status("Timeout - forcing stop")
self.process.terminate()
try:
self.process.wait(timeout=5)
except:
self.process.kill()
except Exception as e:
self.on_status(f"Stop error: {e}")
try:
self.process.terminate()
except:
pass
self.is_recording = False
self.is_paused = False
@@ -210,7 +307,15 @@ class ScreenRecorder:
duration = self.get_duration()
self.on_status(f"Stopped: {duration:.1f}s")
return self.output_path if self.output_path and self.output_path.exists() else None
# Check if output file exists and has content
if self.output_path and self.output_path.exists():
if self.output_path.stat().st_size > 1000: # At least 1KB
return self.output_path
else:
self.on_status("Recording too short or failed")
return None
return None
def get_duration(self) -> float:
"""Get actual recording duration (excluding pauses)."""
@@ -244,6 +349,7 @@ class ScreenRecorder:
capture_output=True,
text=True,
creationflags=subprocess.CREATE_NO_WINDOW,
timeout=10,
)
devices = []
@@ -257,9 +363,12 @@ class ScreenRecorder:
start = line.find('"') + 1
end = line.rfind('"')
if start < end:
devices.append(line[start:end])
device = line[start:end]
# Skip virtual/system devices that don't work well
if "virtual" not in device.lower():
devices.append(device)
return devices if devices else ["Microphone Array"]
return devices if devices else []
except:
return ["Microphone Array"]
return []