🎤 AnythingLLM – Local Whisper Recording Integration
Add a Record button to AnythingLLM that lets you dictate messages using your microphone. The audio is sent to your own local Whisper server for transcription — no OpenAI API required.
🧩 Requirements
- A running Whisper ASR endpoint (example:
https://whisper.parsons.familyds.net/asr) - A supported desktop browser: Chrome, Firefox, Edge, or Brave
- The Tampermonkey browser extension installed
⚙️ Installation Steps
- Install the Tampermonkey extension for your browser.
- Click the Tampermonkey icon → Create a new script…
- Delete any existing placeholder code.
- Paste the full script below.
- Save the script (File → Save or
Ctrl+S). - Visit your AnythingLLM site (or refresh it). You should see a 🎤 Record button next to the chat input.
- Click once to record, click again to stop — your transcribed text appears automatically. Hold Shift while stopping to auto-send.
🧾 Full Script — AnythingLLM 🎤 Record (v1.3)
// ==UserScript==
// @name AnythingLLM – 🎤 Record (local Whisper)
// @namespace https://parsons.family
// @version 1.3
// @description Adds a mic record button to AnythingLLM; transcribes via local Whisper; no OpenAI.
// @match https://anythingllm.parsons.familyds.net/*
// @match http://localhost:3001/*
// @match http://192.168.1.200:3001/*
// @match http://ai-server:3001/*
// @grant none
// @run-at document-idle
// ==/UserScript==
(function () {
'use strict';
const ASR_URL = 'https://whisper.parsons.familyds.net/asr';
const textareaSetter =
Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value')?.set;
const inputSetter =
Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, 'value')?.set;
function setReactValue(el, val) {
if (el instanceof HTMLTextAreaElement && textareaSetter) textareaSetter.call(el, val);
else if (el instanceof HTMLInputElement && inputSetter) inputSetter.call(el, val);
else if (el.isContentEditable) {}
else el.value = val;
try {
el.dispatchEvent(new InputEvent('beforeinput', {
inputType: 'insertText', data: val, bubbles: true, cancelable: true, composed: true
}));
} catch {}
el.dispatchEvent(new Event('input', { bubbles: true, cancelable: true, composed: true }));
el.dispatchEvent(new Event('change', { bubbles: true }));
}
function getValue(el) {
if (el instanceof HTMLTextAreaElement || el instanceof HTMLInputElement) return el.value || '';
if (el.isContentEditable) return el.innerText || '';
return '';
}
function insertIntoContentEditable(el, text) {
el.focus();
const sel = el.ownerDocument.getSelection();
if (!sel || sel.rangeCount === 0) {
el.appendChild(el.ownerDocument.createTextNode(text));
} else {
const range = sel.getRangeAt(0);
range.deleteContents();
range.insertNode(el.ownerDocument.createTextNode(text));
range.collapse(false);
sel.removeAllRanges();
sel.addRange(range);
}
el.dispatchEvent(new InputEvent('beforeinput', {
inputType: 'insertText', data: text, bubbles: true, cancelable: true, composed: true
}));
el.dispatchEvent(new Event('input', { bubbles: true }));
el.dispatchEvent(new Event('change', { bubbles: true }));
}
function isVisible(el) {
const s = getComputedStyle(el);
const r = el.getBoundingClientRect();
return s.display !== 'none' && s.visibility !== 'hidden' && r.width > 200 && r.height > 30;
}
function byArea(a, b) {
const ra = a.getBoundingClientRect(); const rb = b.getBoundingClientRect();
return (rb.width * rb.height) - (ra.width * ra.height);
}
function queryAllCandidates(rootDoc) {
const selectors = [
'textarea[placeholder*="message" i]',
'textarea[aria-label*="message" i]',
'textarea[aria-multiline="true"]',
'textarea',
'[role="textbox"][contenteditable="true"]',
'.ProseMirror[contenteditable="true"]',
'.ql-editor[contenteditable="true"]'
];
let out = [];
selectors.forEach(sel => out.push(...rootDoc.querySelectorAll(sel)));
return out.filter(isVisible).sort(byArea);
}
function findChatInput() {
const topCands = queryAllCandidates(document);
if (topCands.length) return { el: topCands[0], doc: document };
for (const frame of Array.from(document.querySelectorAll('iframe'))) {
try {
const d = frame.contentDocument;
if (!d) continue;
const cands = queryAllCandidates(d);
if (cands.length) return { el: cands[0], doc: d };
} catch {}
}
return null;
}
function ensureButton() {
if (document.getElementById('whisper-record-btn')) return;
const found = findChatInput();
if (!found) return;
const { el } = found;
const btn = document.createElement('button');
btn.id = 'whisper-record-btn';
btn.type = 'button';
btn.textContent = '🎤 Record';
Object.assign(btn.style, {
marginLeft: '8px',
padding: '6px 10px',
borderRadius: '10px',
border: '1px solid #ccc',
cursor: 'pointer',
fontSize: '0.95rem'
});
el.insertAdjacentElement('afterend', btn);
wireRecorder(btn, el);
console.log('🎤 Record button injected next to', el);
}
function wireRecorder(btn, inputEl) {
let rec = null;
let chunks = [];
let stoppingEvent = null;
btn.addEventListener('click', async (ev) => {
if (!rec) {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
chunks = [];
rec = new MediaRecorder(stream, { mimeType: 'audio/webm' });
rec.ondataavailable = e => e.data && e.data.size && chunks.push(e.data);
rec.onstop = () => doTranscribe(btn, inputEl, chunks, stoppingEvent).finally(() => {
rec.stream.getTracks().forEach(t => t.stop());
rec = null; chunks = []; stoppingEvent = null;
btn.disabled = false; btn.textContent = '🎤 Record';
});
rec.start();
btn.textContent = '⏹ Stop (Shift=auto-send)';
} catch (e) {
console.error(e);
alert('Mic permission denied or unavailable.');
}
} else {
stoppingEvent = ev;
btn.disabled = true;
btn.textContent = '…processing';
rec.stop();
}
});
}
async function doTranscribe(btn, inputFallback, chunks, ev) {
try {
const blob = new Blob(chunks, { type: 'audio/webm' });
const isAsr = /\/asr(\?|$)/.test(ASR_URL);
const form = new FormData();
let url = ASR_URL;
if (isAsr) {
url += (ASR_URL.includes('?') ? '&' : '?') + 'task=transcribe&language=en&output=json';
form.append('audio_file', blob, 'recording.webm');
} else {
if (!/\/transcribe(\?|$)/.test(ASR_URL)) url = ASR_URL.replace(/\/$/, '') + '/transcribe';
form.append('file', blob, 'recording.webm');
}
const res = await fetch(url, { method: 'POST', body: form });
if (!res.ok) throw new Error(`ASR ${res.status} ${res.statusText}`);
const raw = await res.text();
let parsed; try { parsed = JSON.parse(raw); } catch { parsed = { text: raw }; }
let text = '';
if (typeof parsed === 'string') text = parsed;
else if (parsed && typeof parsed.text === 'string') text = parsed.text;
else if (parsed && typeof parsed.transcription === 'string') text = parsed.transcription;
else if (Array.isArray(parsed) && parsed[0]?.text) text = parsed[0].text;
text = (text || '').trim();
const found = findChatInput();
const target = (found && found.el) || inputFallback || document.activeElement;
if (target && text) {
const prefix = (getValue(target).endsWith(' ') || getValue(target) === '' ? '' : ' ');
insertAtCursor(target, prefix + text);
if (ev && ev.shiftKey) trySend(target);
} else {
if (navigator.clipboard && text) {
await navigator.clipboard.writeText(text);
alert('Transcript copied to clipboard (paste into AnythingLLM input).');
} else {
alert('Transcribed text:\n\n' + text);
}
}
} catch (e) {
console.error('ASR error:', e);
alert('Transcription failed: ' + e.message + '\n(Check CORS and ASR_URL.)');
}
}
function insertAtCursor(el, text) {
el.focus();
if (el instanceof HTMLTextAreaElement || el instanceof HTMLInputElement) {
const start = el.selectionStart ?? getValue(el).length;
const end = el.selectionEnd ?? getValue(el).length;
const before = getValue(el).slice(0, start);
const after = getValue(el).slice(end);
setReactValue(el, before + text + after);
const caret = start + text.length;
el.selectionStart = el.selectionEnd = caret;
} else if (el.isContentEditable) {
insertIntoContentEditable(el, text);
} else {
setReactValue(el, (getValue(el) || '') + text);
}
}
function trySend(input) {
const ke = new KeyboardEvent('keydown', { bubbles: true, cancelable: true, key: 'Enter', code: 'Enter' });
input.dispatchEvent(ke);
const candidates = Array.from(document.querySelectorAll('button, [role=button]'))
.filter(b => /send|ask|submit|enter/i.test(b.textContent || b.getAttribute('aria-label') || ''));
if (candidates[0]) candidates[0].click();
}
const obs = new MutationObserver(() => ensureButton());
obs.observe(document.documentElement, { childList: true, subtree: true });
window.addEventListener('load', ensureButton);
setTimeout(ensureButton, 1200);
window.whisperDebugInput = () => {
const found = findChatInput();
console.log('whisperDebugInput →', found?.el, 'in doc', found?.doc?.location?.href);
return found?.el || null;
};
})();
✅ Usage
- Click 🎤 Record to start dictating.
- Click again to stop. Your spoken words are transcribed and inserted into the chat input.
- Hold Shift while stopping to automatically send the message.
You can modify the ASR_URL at the top of the script to point to any Whisper-compatible transcription service. Use the console command window.whisperDebugInput() to verify which input field the script detects.
