Commit 80f76637 authored by Jonas Herzig's avatar Jonas Herzig
Browse files

Add voice activity detection

parent c49dabbf
......@@ -11,7 +11,7 @@ Instead Websockets are used for all communications.
libopus, libcelt (0.7.1) and libsamplerate, compiled to JS via emscripten, are used for audio decoding.
Therefore, at the moment only the Opus and CELT Alpha codecs are supported.
Quite a few features, most noticeably voice activity detection and all
Quite a few features, most noticeably all
administrative functionallity, are still missing.
### Installing
......
......@@ -67,11 +67,23 @@
<td>
<select data-bind='value: voiceMode'>
<option value="cont">Continuous</option>
<option value="vad" disabled>Voice Activity</option>
<option value="vad">Voice Activity</option>
<option value="ptt">Push To Talk</option>
</td>
</tr>
<tr data-bind="style: {visibility: voiceMode() == 'ptt' ? 'visible' : 'hidden'}">
<tr data-bind="visible: voiceMode() == 'vad'">
<td colspan="2">
<div class="mic-volume-container">
<div class="mic-volume" data-bind="style: {
width: testVadLevel()*100 + '%',
background: testVadActive() ? 'green' : 'red'
}"></div>
</div>
<input type="range" min="0" max="1" step="0.01"
data-bind="value: vadLevel">
</td>
</tr>
<tr data-bind="visible: voiceMode() == 'ptt'">
<td>PTT Key</td>
<td>
<input type="button" data-bind="value: pttKeyDisplay, click: recordPttKey">
......
......@@ -9,7 +9,7 @@ import ko from 'knockout'
import _dompurify from 'dompurify'
import keyboardjs from 'keyboardjs'
import { ContinuousVoiceHandler, PushToTalkVoiceHandler, initVoice } from './voice'
import { ContinuousVoiceHandler, PushToTalkVoiceHandler, VADVoiceHandler, initVoice } from './voice'
const dompurify = _dompurify(window)
......@@ -58,11 +58,34 @@ class SettingsDialog {
this.voiceMode = ko.observable(settings.voiceMode)
this.pttKey = ko.observable(settings.pttKey)
this.pttKeyDisplay = ko.observable(settings.pttKey)
this.vadLevel = ko.observable(settings.vadLevel)
this.testVadLevel = ko.observable(0)
this.testVadActive = ko.observable(false)
this._setupTestVad()
this.vadLevel.subscribe(() => this._setupTestVad())
}
_setupTestVad () {
if (this._testVad) {
this._testVad.end()
}
this._testVad = new VADVoiceHandler(null, this.vadLevel())
this._testVad.on('started_talking', () => this.testVadActive(true))
.on('stopped_talking', () => this.testVadActive(false))
.on('level', level => this.testVadLevel(level))
testVoiceHandler = this._testVad
}
applyTo (settings) {
settings.voiceMode = this.voiceMode()
settings.pttKey = this.pttKey()
settings.vadLevel = this.vadLevel()
}
end () {
this._testVad.end()
testVoiceHandler = null
}
recordPttKey () {
......@@ -89,14 +112,16 @@ class SettingsDialog {
class Settings {
constructor () {
const load = key => window.localStorage.getItem('mumble.' + key)
this.voiceMode = load('voiceMode') || 'cont'
this.voiceMode = load('voiceMode') || 'vad'
this.pttKey = load('pttKey') || 'ctrl + shift'
this.vadLevel = load('vadLevel') || 0.3
}
save () {
const save = (key, val) => window.localStorage.setItem('mumble.' + key, val)
save('voiceMode', this.voiceMode)
save('pttKey', this.pttKey)
save('vadLevel', this.vadLevel)
}
}
......@@ -130,10 +155,13 @@ class GlobalBindings {
this._updateVoiceHandler()
this.settings.save()
this.settingsDialog(null)
this.closeSettings()
}
this.closeSettings = () => {
if (this.settingsDialog()) {
this.settingsDialog().end()
}
this.settingsDialog(null)
}
......@@ -360,7 +388,7 @@ class GlobalBindings {
} else if (mode === 'ptt') {
voiceHandler = new PushToTalkVoiceHandler(this.client, this.settings.pttKey)
} else if (mode === 'vad') {
voiceHandler = new VADVoiceHandler(this.client, this.settings.vadLevel)
} else {
log('Unknown voice mode:', mode)
return
......@@ -586,15 +614,19 @@ function userToState () {
}
var voiceHandler
var testVoiceHandler
initVoice(data => {
if (testVoiceHandler) {
testVoiceHandler.write(data)
}
if (!ui.client) {
if (voiceHandler) {
voiceHandler.end()
}
voiceHandler = null
} else if (voiceHandler) {
voiceHandler.write(new Float32Array(data.buffer, data.byteOffset, data.byteLength / 4))
voiceHandler.write(data)
}
}, err => {
log('Cannot initialize user media. Microphone will not work:', err)
......
import { Writable } from 'stream'
import { Writable, Transform } from 'stream'
import MicrophoneStream from 'microphone-stream'
import audioContext from 'audio-context'
import chunker from 'stream-chunker'
import Resampler from 'libsamplerate.js'
import getUserMedia from 'getusermedia'
import keyboardjs from 'keyboardjs'
import vad from 'voice-activity-detection'
import DropStream from 'drop-stream'
class VoiceHandler extends Writable {
constructor (client) {
......@@ -15,7 +17,29 @@ class VoiceHandler extends Writable {
_getOrCreateOutbound () {
if (!this._outbound) {
this._outbound = this._client.createVoiceStream()
if (!this._client) {
this._outbound = DropStream.obj()
this.emit('started_talking')
return this._outbound
}
this._outbound = new Resampler({
unsafe: true,
type: Resampler.Type.SINC_FASTEST,
ratio: 48000 / audioContext.sampleRate
})
const buffer2Float32Array = new Transform({
transform (data, _, callback) {
callback(null, new Float32Array(data.buffer, data.byteOffset, data.byteLength / 4))
},
readableObjectMode: true
})
this._outbound
.pipe(chunker(4 * 480))
.pipe(buffer2Float32Array)
.pipe(this._client.createVoiceStream())
this.emit('started_talking')
}
return this._outbound
......@@ -74,24 +98,76 @@ export class PushToTalkVoiceHandler extends VoiceHandler {
}
}
export function initVoice (onData, onUserMediaError) {
var resampler = new Resampler({
unsafe: true,
type: Resampler.Type.SINC_FASTEST,
ratio: 48000 / audioContext.sampleRate
})
export class VADVoiceHandler extends VoiceHandler {
constructor (client, level) {
super(client)
const self = this
this._vad = vad(audioContext, theUserMedia, {
onVoiceStart () {
console.log('vad: start')
self._active = true
},
onVoiceStop () {
console.log('vad: stop')
self._stopOutbound()
self._active = false
},
onUpdate (val) {
self._level = val
self.emit('level', val)
},
noiseCaptureDuration: 0,
minNoiseLevel: level,
maxNoiseLevel: level
})
// Need to keep a backlog of the last ~150ms (dependent on sample rate)
// because VAD will activate with ~125ms delay
this._backlog = []
this._backlogLength = 0
this._backlogLengthMin = 1024 * 6 * 4 // vadBufferLen * (vadDelay + 1) * bytesPerSample
}
resampler.pipe(chunker(4 * 480)).on('data', data => {
onData(data)
})
_write (data, _, callback) {
if (this._active) {
if (this._backlog.length > 0) {
for (let oldData of this._backlog) {
this._getOrCreateOutbound().write(oldData)
}
this._backlog = []
this._backlogLength = 0
}
this._getOrCreateOutbound().write(data, callback)
} else {
// Make sure we always keep the backlog filled if we're not (yet) talking
this._backlog.push(data)
this._backlogLength += data.length
// Check if we can discard the oldest element without becoming too short
if (this._backlogLength - this._backlog[0].length > this._backlogLengthMin) {
this._backlogLength -= this._backlog.shift().length
}
callback()
}
}
_final (callback) {
super._final(e => {
this._vad.destroy()
callback(e)
})
}
}
var theUserMedia = null
export function initVoice (onData, onUserMediaError) {
getUserMedia({ audio: true }, (err, userMedia) => {
if (err) {
onUserMediaError(err)
} else {
var micStream = new MicrophoneStream(userMedia, { objectMode: true })
theUserMedia = userMedia
var micStream = new MicrophoneStream(userMedia, { objectMode: true, bufferSize: 1024 })
micStream.on('data', data => {
resampler.write(Buffer.from(data.getChannelData(0).buffer))
onData(Buffer.from(data.getChannelData(0).buffer))
})
}
})
......
......@@ -208,8 +208,10 @@ form {
border-bottom: 1px solid darkgray;
}
.dialog-footer {
position: absolute;
bottom: 0px;
width: calc(100% - 20px);
margin: 10px;
margin-bottom: 0px;
}
.dialog-close {
float: left;
......@@ -255,6 +257,14 @@ form {
}
.settings-dialog table input {
width: 100%;
margin: 0px;
}
.settings-dialog .mic-volume-container {
height: 10px;
border: 3px solid black;
}
.settings-dialog .mic-volume {
height: 100%;
}
.connect-dialog {
width: 300px;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment