Ai_Assistant/client/audioManager.js
2026-06-10 21:40:08 +02:00

163 lines
5.5 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

export class AudioManager {
/**
* @param vrm - VRM model with expressionManager
* @param options - optional config overrides:
* smoothFactor (0-1), ampThreshold, ampScale,
* centroidThresholds: { wide, ih, oh }
*/
constructor(vrm, options = {}) {
this.vrm = vrm;
this.currentExpression = "neutral";
this.audioElement = null;
this.audioContext = null;
this.analyser = null;
this.timeDomainData = null;
this.freqData = null;
this.prevValues = { aa: 0, ee: 0, ih: 0, oh: 0, ou: 0 };
// default parameters
const defaults = {
smoothFactor: 0.2,
ampThreshold: 0.005,
ampScale: 1, // IF THE MOUTH IS TOO WIDE, CONFIGURE SETTINGS HERE!!!
centroidThresholds: { wide: 0.75, ih: 0.5, oh: 0.25 }
};
// merge options
this.config = { ...defaults, ...options };
if (options.centroidThresholds) {
this.config.centroidThresholds = { ...defaults.centroidThresholds, ...options.centroidThresholds };
}
this.allExpressions = ["happy", "angry", "sad", "relaxed", "surprised", "neutral"];
}
/** Update config at runtime */
updateConfig(newConfig) {
Object.assign(this.config, newConfig);
if (newConfig.centroidThresholds) {
Object.assign(this.config.centroidThresholds, newConfig.centroidThresholds);
}
}
setExpression(expr) {
for (const e of this.allExpressions) {
if (this.vrm.expressionManager.getExpression(e)) {
this.vrm.expressionManager.setValue(e, 0);
}
}
if (this.vrm.expressionManager.getExpression(expr)) {
this.vrm.expressionManager.setValue(expr, 1.0);
this.currentExpression = expr;
}
}
async setupAudio(url) {
// 1. AudioContext und Analyzer nur ein einziges Mal erstellen!
if (!this.audioContext) {
this.audioContext = new (window.AudioContext || window.webkitAudioContext)();
this.analyser = this.audioContext.createAnalyser();
this.analyser.fftSize = 2048;
this.analyser.connect(this.audioContext.destination);
this.timeDomainData = new Uint8Array(this.analyser.fftSize);
this.freqData = new Uint8Array(this.analyser.frequencyBinCount);
}
// 2. Audio-Element nur ein einziges Mal erstellen und mit Analyzer verbinden!
if (!this.audioElement) {
this.audioElement = new Audio();
this.audioElement.crossOrigin = "anonymous";
const source = this.audioContext.createMediaElementSource(this.audioElement);
source.connect(this.analyser);
}
// 3. Neue Audio-URL reinladen
this.audioElement.src = url;
this.audioElement.load();
// 4. Der bombensichere Lade-Check (verhindert das lautlose Einfrieren!)
await new Promise((res, rej) => {
// Wenn die Datei schon im Cache ist und quasi sofort geladen wurde:
if (this.audioElement.readyState >= 3) {
res();
return;
}
// Ansonsten warten wir brav auf das Event:
this.audioElement.oncanplaythrough = () => {
this.audioElement.oncanplaythrough = null; // Event wieder aufräumen
res();
};
this.audioElement.onerror = (e) => {
this.audioElement.onerror = null;
rej(new Error("Fehler beim Laden der Audiodatei."));
};
});
}
async analyzeAudio() {
if (!this.audioContext) {
throw new Error("Call setupAudio() before analyzeAudio().");
}
if (this.audioContext.state === 'suspended') {
await this.audioContext.resume();
}
}
resetMouth() {
Object.keys(this.prevValues).forEach(k => {
if (this.vrm.expressionManager.getExpression(k)) {
this.vrm.expressionManager.setValue(k, 0);
}
this.prevValues[k] = 0;
});
}
/**
* Call each frame (time arg ignored)
*/
updateLipSync(time) {
if (!this.analyser) return;
this.analyser.getByteTimeDomainData(this.timeDomainData);
this.analyser.getByteFrequencyData(this.freqData);
// RMS amplitude
let sumSq = 0;
for (let i = 0; i < this.timeDomainData.length; i++) {
const v = (this.timeDomainData[i] - 128) / 128;
sumSq += v * v;
}
const rms = Math.sqrt(sumSq / this.timeDomainData.length);
// spectral centroid (01)
let weightedSum = 0, magSum = 0;
for (let i = 0; i < this.freqData.length; i++) {
const mag = this.freqData[i];
weightedSum += i * mag;
magSum += mag;
}
const centroid = magSum > 0 ? (weightedSum / magSum) / this.freqData.length : 0;
// map to phoneme shapes with config
const { ampThreshold, ampScale, centroidThresholds, smoothFactor } = this.config;
const shapes = { aa: 0, ee: 0, ih: 0, oh: 0, ou: 0 };
if (rms > ampThreshold) {
const level = Math.min(1, (rms - ampThreshold) * ampScale);
shapes.aa = level;
if (centroid > centroidThresholds.wide) shapes.ee = level;
else if (centroid > centroidThresholds.ih) shapes.ih = level;
else if (centroid > centroidThresholds.oh) shapes.oh = level;
else shapes.ou = level;
}
// smoothing + apply
for (const k of Object.keys(shapes)) {
const target = shapes[k];
const smooth = this.prevValues[k] + smoothFactor * (target - this.prevValues[k]);
this.prevValues[k] = smooth;
if (this.vrm.expressionManager.getExpression(k)) {
this.vrm.expressionManager.setValue(k, smooth);
}
}
}
}