diff --git a/Tool_Scripts/Whisper_Transcript/Android-Port-Brainstorm.md b/Tool_Scripts/Whisper_Transcript/Android-Port-Brainstorm.md new file mode 100644 index 0000000..d751739 --- /dev/null +++ b/Tool_Scripts/Whisper_Transcript/Android-Port-Brainstorm.md @@ -0,0 +1,256 @@ +# Voice Recorder Android Port - Brainstorm + +## Current App Features to Port + +1. **Audio Recording** - Record with pause/resume +2. **Whisper Transcription** - Local AI transcription +3. **Note Type Selection** - Meeting, Todo, Idea, Review, Journal +4. **Obsidian Export** - Markdown files with YAML frontmatter +5. **Claude Processing** - AI-powered note organization +6. **Folder Organization** - Auto-sort by note type + +--- + +## Approach Options + +### Option 1: React Native + Expo (Recommended) + +**Pros:** +- Cross-platform (iOS + Android) +- Large ecosystem, good documentation +- Hot reload for fast development +- Can use `expo-av` for audio recording +- Good integration with file systems + +**Cons:** +- Whisper would need cloud API or native module +- Claude CLI not available, would need API + +**Stack:** +``` +- React Native / Expo +- expo-av (recording) +- expo-file-system (file management) +- OpenAI Whisper API or whisper.cpp native module +- Claude API (not CLI) +- Obsidian sync via shared folder or plugin +``` + +**Effort:** Medium (2-3 weeks for MVP) + +--- + +### Option 2: Flutter + +**Pros:** +- Single codebase for Android/iOS +- Fast performance with native compilation +- Good audio packages (record, just_audio) +- Material Design 3 built-in + +**Cons:** +- Dart learning curve +- Whisper integration more complex +- Smaller ecosystem than React Native + +**Stack:** +``` +- Flutter / Dart +- record package (audio) +- whisper_flutter or cloud API +- Claude API +- path_provider for file storage +``` + +**Effort:** Medium-High (3-4 weeks for MVP) + +--- + +### Option 3: Native Kotlin (Android Only) + +**Pros:** +- Best performance +- Full Android API access +- Can integrate whisper.cpp directly +- Better battery optimization +- Works offline + +**Cons:** +- Android only (no iOS) +- More code to maintain +- Longer development time + +**Stack:** +``` +- Kotlin + Jetpack Compose +- MediaRecorder API +- whisper.cpp via JNI (local transcription) +- Claude API +- Storage Access Framework for Obsidian folder +``` + +**Effort:** High (4-6 weeks for MVP) + +--- + +### Option 4: PWA (Progressive Web App) + +**Pros:** +- Works on any device with browser +- No app store needed +- Shared codebase with potential web app +- Easy updates + +**Cons:** +- Limited audio recording capabilities +- No background processing +- Can't access file system directly +- Requires internet for Whisper + +**Stack:** +``` +- Vue.js or React +- MediaRecorder Web API +- Whisper API (cloud) +- Claude API +- Download files or sync via Obsidian plugin +``` + +**Effort:** Low-Medium (1-2 weeks for MVP) + +--- + +## Whisper Integration Options + +### Cloud-based (Easier) +1. **OpenAI Whisper API** - $0.006/min, reliable +2. **Replicate** - Pay per use, hosted models +3. **Self-hosted** - Run whisper on home server/NAS + +### On-device (Harder but offline) +1. **whisper.cpp** - C++ port, works on Android via JNI +2. **whisper-android** - Pre-built Android bindings +3. **ONNX Runtime** - Run whisper.onnx model + +**Recommendation:** Start with OpenAI API, add offline later + +--- + +## Obsidian Sync Options + +### Option A: Direct File Access +- Use Android's Storage Access Framework +- User grants access to Obsidian vault folder +- Write markdown files directly +- Works with Obsidian Sync, Syncthing, etc. + +### Option B: Obsidian Plugin +- Create companion plugin for Obsidian +- App sends notes via local HTTP server +- Plugin receives and saves notes +- More complex but cleaner UX + +### Option C: Share Intent +- Use Android share functionality +- Share transcribed note to Obsidian +- User manually saves +- Simplest but requires user action + +**Recommendation:** Option A (direct file access) + +--- + +## Recommended MVP Approach + +### Phase 1: Core Recording (Week 1) +- React Native + Expo setup +- Basic UI matching desktop app style +- Audio recording with pause/resume +- Timer display +- Note type selection + +### Phase 2: Transcription (Week 2) +- OpenAI Whisper API integration +- Loading states and error handling +- Transcript preview + +### Phase 3: Export & Processing (Week 3) +- File system access setup +- Markdown generation +- Claude API integration +- Folder organization + +### Phase 4: Polish (Week 4) +- Offline queue for transcription +- Settings screen +- Obsidian folder picker +- Widget for quick recording + +--- + +## Technical Considerations for Pixel 7 + +### Hardware Advantages +- Tensor G2 chip - could run small whisper models +- Good microphone array +- Large battery + +### Android-Specific Features +- Material You theming +- Quick Settings tile +- Home screen widget +- Voice Assistant integration potential + +--- + +## Alternative: Termux + Python + +For a quick hack without building a full app: + +```bash +# Install Termux from F-Droid +pkg install python +pip install openai-whisper sounddevice + +# Run existing Python script (modified) +python voice_recorder_android.py +``` + +**Pros:** Reuse existing code, fast to test +**Cons:** Requires Termux, not user-friendly + +--- + +## Decision Matrix + +| Criteria | React Native | Flutter | Kotlin | PWA | +|----------|-------------|---------|--------|-----| +| Dev Speed | Fast | Medium | Slow | Fastest | +| Performance | Good | Great | Best | OK | +| Offline | Possible | Possible | Yes | No | +| iOS Support | Yes | Yes | No | Yes | +| Learning Curve | Low | Medium | Medium | Low | +| Maintenance | Easy | Easy | More | Easy | + +--- + +## Recommended Path + +1. **Start with React Native + Expo** for fastest MVP +2. **Use OpenAI Whisper API** initially +3. **Direct file access** to Obsidian vault +4. **Claude API** (not CLI) for processing +5. **Add offline whisper.cpp** later if needed + +This approach gets a working app fastest while leaving room for optimization. + +--- + +## Next Steps + +- [ ] Set up React Native + Expo project +- [ ] Design mobile UI mockups +- [ ] Get OpenAI API key for Whisper +- [ ] Get Claude API key +- [ ] Test file system access on Pixel 7 +- [ ] Create basic recording prototype diff --git a/Tool_Scripts/Whisper_Transcript/Transcribe.bat b/Tool_Scripts/Whisper_Transcript/Transcribe.bat new file mode 100644 index 0000000..519b9c9 --- /dev/null +++ b/Tool_Scripts/Whisper_Transcript/Transcribe.bat @@ -0,0 +1,102 @@ +@echo off +setlocal enabledelayedexpansion + +:: ============================================ +:: CONFIGURATION - Edit these paths as needed +:: ============================================ +set "OUTPUT_DIR=C:\Users\antoi\antoine\My Libraries\Antoine Brain Extension\+\Transcripts" +set "CONDA_ENV=test_env" +set "CONDA_PATH=C:\Users\antoi\anaconda3\Scripts\activate.bat" + +:: ============================================ +:: MAIN SCRIPT - No edits needed below +:: ============================================ + +:: Check if file was dragged onto script +if "%~1"=="" ( + echo. + echo ======================================== + echo Voice Memo Transcriber + echo ======================================== + echo. + echo Drag an audio file onto this script! + echo Or paste the full path below: + echo. + set /p "AUDIO_FILE=File path: " +) else ( + set "AUDIO_FILE=%~1" +) + +:: Generate timestamp for filename +for /f "tokens=1-5 delims=/:.- " %%a in ("%date% %time%") do ( + set "TIMESTAMP=%%c-%%a-%%b %%d-%%e" +) + +set "NOTE_NAME=Voice Note %TIMESTAMP%.md" +set "TEMP_FILE=%TEMP%\whisper_output.txt" + +echo. +echo ======================================== +echo Transcribing: %AUDIO_FILE% +echo Output: %NOTE_NAME% +echo ======================================== +echo. +echo This may take a few minutes for long recordings... +echo. + +:: Activate conda environment and run whisper +call %CONDA_PATH% %CONDA_ENV% +insanely-fast-whisper --file-name "%AUDIO_FILE%" --transcript-path "%TEMP_FILE%" --model-name openai/whisper-large-v3 + +:: Check if transcription succeeded +if not exist "%TEMP_FILE%" ( + echo. + echo ERROR: Transcription failed! + echo Check that the audio file exists and is valid. + echo. + pause + exit /b 1 +) + +:: Create markdown note with YAML frontmatter +echo --- > "%OUTPUT_DIR%\%NOTE_NAME%" +echo created: %date% %time:~0,5% >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo type: voice-note >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo status: raw >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo tags: >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo - transcript >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo - voice-memo >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo --- >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo. >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo # Voice Note - %date% at %time:~0,5% >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo. >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo ## Metadata >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo. >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo - **Source file:** `%~nx1` >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo - **Transcribed:** %date% %time:~0,5% >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo. >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo --- >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo. >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo ## Raw Transcript >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo. >> "%OUTPUT_DIR%\%NOTE_NAME%" +type "%TEMP_FILE%" >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo. >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo. >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo --- >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo. >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo ## Notes distillees >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo. >> "%OUTPUT_DIR%\%NOTE_NAME%" +echo ^ + +``` + +--- + +## Processing with Claude + +After transcription, use this prompt template to organize your notes: + +``` +Voici un transcript de notes vocales en français/anglais. +Peux-tu: + +1. Corriger les erreurs de transcription évidentes +2. Organiser par thèmes/sujets +3. Extraire les points clés et action items +4. Reformatter en notes structurées + +Garde le contenu original mais rends-le plus lisible. + +--- + +[COLLER LE TRANSCRIPT ICI] +``` + +--- + +## Troubleshooting + +### "conda is not recognized" +- Verify conda path: `where conda` +- Update `CONDA_PATH` in the script to match your installation + +### Transcription takes too long +- The `large-v3` model is accurate but slow on CPU +- For faster (less accurate) results, change model to: + ``` + --model-name openai/whisper-medium + ``` + or + ``` + --model-name openai/whisper-small + ``` + +### GPU acceleration +If you have an NVIDIA GPU, install CUDA support: +```bash +pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 +``` + +### Wrong language detected +Add language hint to the whisper command: +```bash +insanely-fast-whisper --file-name "audio.mp3" --transcript-path "output.txt" --model-name openai/whisper-large-v3 --language fr +``` + +--- + +## Alternative: Python Script Version + +For more control or integration with other tools: + +**File:** `transcribe.py` + +```python +import subprocess +import sys +from datetime import datetime +from pathlib import Path + +# Configuration +OUTPUT_DIR = Path(r"C:\Users\antoi\antoine\My Libraries\Antoine Brain Extension\+\Transcripts") +MODEL = "openai/whisper-large-v3" + +def transcribe(audio_path: str): + audio_file = Path(audio_path) + timestamp = datetime.now().strftime("%Y-%m-%d %H-%M") + note_name = f"Voice Note {timestamp}.md" + temp_file = Path.home() / "AppData/Local/Temp/whisper_output.txt" + + print(f"\n🎙️ Transcribing: {audio_file.name}") + print(f"📝 Output: {note_name}\n") + + # Run whisper + subprocess.run([ + "insanely-fast-whisper", + "--file-name", str(audio_file), + "--transcript-path", str(temp_file), + "--model-name", MODEL + ]) + + # Read transcript + transcript = temp_file.read_text(encoding="utf-8") + + # Create markdown note + note_content = f"""--- +created: {datetime.now().strftime("%Y-%m-%d %H:%M")} +type: voice-note +status: raw +tags: + - transcript + - voice-memo +--- + +# Voice Note - {datetime.now().strftime("%Y-%m-%d")} at {datetime.now().strftime("%H:%M")} + +## Metadata + +- **Source file:** `{audio_file.name}` +- **Transcribed:** {datetime.now().strftime("%Y-%m-%d %H:%M")} + +--- + +## Raw Transcript + +{transcript} + +--- + +## Notes distillees + + + +""" + + output_path = OUTPUT_DIR / note_name + output_path.write_text(note_content, encoding="utf-8") + + print(f"\n✅ Done! Created: {note_name}") + print(f"📁 Location: {OUTPUT_DIR}") + +if __name__ == "__main__": + if len(sys.argv) > 1: + transcribe(sys.argv[1]) + else: + audio = input("Enter audio file path: ").strip('"') + transcribe(audio) +``` + +Run with: +```bash +conda activate test_env +python transcribe.py "path/to/audio.mp3" +``` + +--- + +## Next Steps + +- [ ] Install `insanely-fast-whisper` in `test_env` +- [ ] Save `Transcribe.bat` to Desktop +- [ ] Test with a short audio clip +- [ ] Pin to taskbar for quick access +- [ ] Set up Claude prompt template for processing + +--- + +## Resources + +- [insanely-fast-whisper GitHub](https://github.com/Vaibhavs10/insanely-fast-whisper) +- [OpenAI Whisper](https://github.com/openai/whisper) +- [Whisper model comparison](https://github.com/openai/whisper#available-models-and-languages) diff --git a/Tool_Scripts/Whisper_Transcript/create_icon.py b/Tool_Scripts/Whisper_Transcript/create_icon.py new file mode 100644 index 0000000..edfa51f --- /dev/null +++ b/Tool_Scripts/Whisper_Transcript/create_icon.py @@ -0,0 +1,131 @@ +""" +Generate a custom icon for the Voice Recorder application. +Creates a modern microphone icon with the app's color scheme. +""" + +from PIL import Image, ImageDraw, ImageFont +import os + +def create_voice_recorder_icon(): + """Create a modern microphone icon.""" + + # Icon sizes for Windows ICO (multiple sizes) + sizes = [16, 32, 48, 64, 128, 256] + + # Colors matching the app theme + bg_color = (13, 17, 23) # #0d1117 + accent_red = (248, 81, 73) # #f85149 + accent_purple = (163, 113, 247) # #a371f7 + white = (230, 237, 243) # #e6edf3 + + images = [] + + for size in sizes: + # Create image with transparent background + img = Image.new('RGBA', (size, size), (0, 0, 0, 0)) + draw = ImageDraw.Draw(img) + + # Draw circular background + padding = int(size * 0.05) + draw.ellipse( + [padding, padding, size - padding, size - padding], + fill=bg_color + ) + + # Calculate proportional dimensions + center_x = size // 2 + center_y = size // 2 + + # Microphone body (rounded rectangle) + mic_width = int(size * 0.28) + mic_height = int(size * 0.38) + mic_top = int(size * 0.18) + mic_left = center_x - mic_width // 2 + mic_right = center_x + mic_width // 2 + mic_bottom = mic_top + mic_height + + # Draw microphone head (pill shape) + radius = mic_width // 2 + draw.ellipse( + [mic_left, mic_top, mic_right, mic_top + mic_width], + fill=accent_red + ) + draw.rectangle( + [mic_left, mic_top + radius, mic_right, mic_bottom], + fill=accent_red + ) + draw.ellipse( + [mic_left, mic_bottom - radius, mic_right, mic_bottom + radius], + fill=accent_red + ) + + # Microphone lines (detail) + if size >= 48: + line_color = (*bg_color, 100) + line_y1 = mic_top + int(mic_height * 0.35) + line_y2 = mic_top + int(mic_height * 0.55) + line_y3 = mic_top + int(mic_height * 0.75) + line_margin = int(mic_width * 0.25) + + for line_y in [line_y1, line_y2, line_y3]: + draw.line( + [(mic_left + line_margin, line_y), (mic_right - line_margin, line_y)], + fill=bg_color, + width=max(1, size // 32) + ) + + # Microphone stand curve + stand_top = mic_bottom + int(size * 0.02) + stand_width = int(size * 0.4) + stand_left = center_x - stand_width // 2 + stand_right = center_x + stand_width // 2 + + # Draw arc for stand + arc_height = int(size * 0.12) + draw.arc( + [stand_left, stand_top - arc_height, stand_right, stand_top + arc_height], + start=0, + end=180, + fill=accent_purple, + width=max(2, size // 16) + ) + + # Draw vertical stand + stand_line_top = stand_top + arc_height // 2 + stand_line_bottom = int(size * 0.78) + line_width = max(2, size // 16) + draw.line( + [(center_x, stand_line_top), (center_x, stand_line_bottom)], + fill=accent_purple, + width=line_width + ) + + # Draw base + base_width = int(size * 0.3) + base_y = stand_line_bottom + draw.line( + [(center_x - base_width // 2, base_y), (center_x + base_width // 2, base_y)], + fill=accent_purple, + width=line_width + ) + + images.append(img) + + # Save as ICO + script_dir = os.path.dirname(os.path.abspath(__file__)) + ico_path = os.path.join(script_dir, "voice_recorder.ico") + + # Save with multiple sizes + images[0].save( + ico_path, + format='ICO', + sizes=[(s, s) for s in sizes], + append_images=images[1:] + ) + + print(f"Icon created: {ico_path}") + return ico_path + + +if __name__ == "__main__": + create_voice_recorder_icon() diff --git a/Tool_Scripts/Whisper_Transcript/create_shortcut.bat b/Tool_Scripts/Whisper_Transcript/create_shortcut.bat new file mode 100644 index 0000000..1b8ebe5 --- /dev/null +++ b/Tool_Scripts/Whisper_Transcript/create_shortcut.bat @@ -0,0 +1,20 @@ +@echo off +:: Create Desktop Shortcut for Voice Recorder +:: Run this script once to create the shortcut + +set "SCRIPT_DIR=%~dp0" +set "BAT_PATH=%SCRIPT_DIR%VoiceRecorder.bat" +set "ICO_PATH=%SCRIPT_DIR%voice_recorder.ico" + +:: Use PowerShell to create shortcut +powershell -ExecutionPolicy Bypass -Command ^ + "$ws = New-Object -ComObject WScript.Shell; ^ + $shortcut = $ws.CreateShortcut([Environment]::GetFolderPath('Desktop') + '\Voice Recorder.lnk'); ^ + $shortcut.TargetPath = '%BAT_PATH%'; ^ + $shortcut.WorkingDirectory = '%SCRIPT_DIR%'; ^ + $shortcut.IconLocation = '%ICO_PATH%'; ^ + $shortcut.Description = 'Voice Recorder - Record and transcribe voice memos to Obsidian'; ^ + $shortcut.Save(); ^ + Write-Host 'Desktop shortcut created successfully!' -ForegroundColor Green" + +pause diff --git a/Tool_Scripts/Whisper_Transcript/create_shortcut.ps1 b/Tool_Scripts/Whisper_Transcript/create_shortcut.ps1 new file mode 100644 index 0000000..3bb5b16 --- /dev/null +++ b/Tool_Scripts/Whisper_Transcript/create_shortcut.ps1 @@ -0,0 +1,25 @@ +# Create Desktop Shortcut for Voice Recorder +# Run this script once to create the shortcut + +$scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Definition +$batPath = Join-Path $scriptDir "VoiceRecorder.bat" +$icoPath = Join-Path $scriptDir "voice_recorder.ico" +$desktopPath = [Environment]::GetFolderPath("Desktop") +$shortcutPath = Join-Path $desktopPath "Voice Recorder.lnk" + +# Create WScript Shell object +$WshShell = New-Object -ComObject WScript.Shell +$Shortcut = $WshShell.CreateShortcut($shortcutPath) + +# Configure shortcut +$Shortcut.TargetPath = $batPath +$Shortcut.WorkingDirectory = $scriptDir +$Shortcut.IconLocation = $icoPath +$Shortcut.Description = "Voice Recorder - Record and transcribe voice memos to Obsidian" +$Shortcut.WindowStyle = 1 # Normal window + +# Save shortcut +$Shortcut.Save() + +Write-Host "Desktop shortcut created: $shortcutPath" -ForegroundColor Green +Write-Host "Icon: $icoPath" -ForegroundColor Cyan diff --git a/Tool_Scripts/Whisper_Transcript/transcribe.py b/Tool_Scripts/Whisper_Transcript/transcribe.py new file mode 100644 index 0000000..113a434 --- /dev/null +++ b/Tool_Scripts/Whisper_Transcript/transcribe.py @@ -0,0 +1,72 @@ +import subprocess +import sys +from datetime import datetime +from pathlib import Path + +# Configuration +OUTPUT_DIR = Path(r"C:\Users\antoi\antoine\My Libraries\Antoine Brain Extension\+\Transcripts") +MODEL = "openai/whisper-large-v3" + +def transcribe(audio_path: str): + audio_file = Path(audio_path) + timestamp = datetime.now().strftime("%Y-%m-%d %H-%M") + note_name = f"Voice Note {timestamp}.md" + temp_file = Path.home() / "AppData/Local/Temp/whisper_output.txt" + + print(f"\n🎙️ Transcribing: {audio_file.name}") + print(f"📝 Output: {note_name}\n") + + # Run whisper + subprocess.run([ + "insanely-fast-whisper", + "--file-name", str(audio_file), + "--transcript-path", str(temp_file), + "--model-name", MODEL + ]) + + # Read transcript + transcript = temp_file.read_text(encoding="utf-8") + + # Create markdown note + note_content = f"""--- +created: {datetime.now().strftime("%Y-%m-%d %H:%M")} +type: voice-note +status: raw +tags: + - transcript + - voice-memo +--- + +# Voice Note - {datetime.now().strftime("%Y-%m-%d")} at {datetime.now().strftime("%H:%M")} + +## Metadata + +- **Source file:** `{audio_file.name}` +- **Transcribed:** {datetime.now().strftime("%Y-%m-%d %H:%M")} + +--- + +## Raw Transcript + +{transcript} + +--- + +## Notes distillees + + + +""" + + output_path = OUTPUT_DIR / note_name + output_path.write_text(note_content, encoding="utf-8") + + print(f"\n✅ Done! Created: {note_name}") + print(f"📁 Location: {OUTPUT_DIR}") + +if __name__ == "__main__": + if len(sys.argv) > 1: + transcribe(sys.argv[1]) + else: + audio = input("Enter audio file path: ").strip('"') + transcribe(audio) diff --git a/Tool_Scripts/Whisper_Transcript/voice_recorder.ico b/Tool_Scripts/Whisper_Transcript/voice_recorder.ico new file mode 100644 index 0000000..6ce7230 Binary files /dev/null and b/Tool_Scripts/Whisper_Transcript/voice_recorder.ico differ