Files
archived-vdo.ninja/examples/googleai.html
2025-05-09 03:02:30 -04:00

965 lines
36 KiB
HTML

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Gemini Vision Chat - Live AI Video Conversations</title>
<meta name="description" content="Experience real-time AI video conversations with Google's Gemini Vision AI. This interactive demo showcases live video analysis and natural language processing capabilities.">
<meta name="keywords" content="Gemini AI, video chat, AI assistant, Google AI, computer vision, real-time AI">
<meta name="robots" content="index, follow">
<meta property="og:title" content="Gemini Vision Chat">
<meta property="og:description" content="Live video conversations with Google's Gemini Vision AI">
<meta property="og:type" content="website">
<meta name="author" content="Steve Seguin">
<link rel="me" href="https://github.com/steveseguin">
<meta property="article:author" content="https://github.com/steveseguin">
<link rel="icon" type="image/svg+xml" href="data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCA2NCA2NCI+PGRlZnM+PGxpbmVhckdyYWRpZW50IGlkPSJnMSIgeDE9IjAlIiB5MT0iMCUiIHgyPSIxMDAlIiB5Mj0iMTAwJSI+PHN0b3Agb2Zmc2V0PSIwJSIgc3R5bGU9InN0b3AtY29sb3I6IzQwNEVFRCIvPjxzdG9wIG9mZnNldD0iMTAwJSIgc3R5bGU9InN0b3AtY29sb3I6IzU4NjVGMiIvPjwvbGluZWFyR3JhZGllbnQ+PC9kZWZzPjxwYXRoIGQ9Ik04IDhoNDh2MzhIMjJMOCA1NlY4eiIgZmlsbD0idXJsKCNnMSkiLz48cGF0aCBkPSJNMjAgMjhoMjRNMjAgMjBoMjRNMjAgMzZoMTYiIHN0cm9rZT0iI2ZmZiIgc3Ryb2tlLXdpZHRoPSI0IiBzdHJva2UtbGluZWNhcD0icm91bmQiLz48Y2lyY2xlIGN4PSI0OCIgY3k9IjM2IiByPSIzIiBmaWxsPSIjZmZmIi8+PC9zdmc+">
<style>
body {
margin: 0;
padding: 20px;
display: flex;
height: 100vh;
box-sizing: border-box;
font-family: system-ui, -apple-system, sans-serif;
background: #1a1a1a;
color: #e0e0e0;
position: relative;
}
.github-link {
position: fixed;
bottom: 15px;
left: 15px;
opacity: 0.7;
transition: opacity 0.2s;
}
.github-link:hover {
opacity: 1;
}
p {
display: inline-block;
}
.left-panel {
width: 50%;
padding-right: 20px;
}
.right-panel {
width: 50%;
display: flex;
flex-direction: column;
height: 100%;
}
.controls {
margin-bottom: 20px;
display: flex;
gap: 10px;
flex-wrap: wrap;
}
.preview {
width: 100%;
max-height: calc(100vh - 200px);
object-fit: contain;
border-radius: 12px;
background: #2a2a2a;
}
#error {
color: #ff6b6b;
margin: 10px 0;
}
select, button, .api-key, .message-input {
background: #2a2a2a;
border: 1px solid #404040;
color: #e0e0e0;
padding: 8px 12px;
border-radius: 8px;
font-size: 14px;
transition: all 0.2s ease;
}
select:hover, button:hover {
background: #333;
border-color: #505050;
}
button {
cursor: pointer;
background: #404eed;
border: none;
font-weight: 500;
}
button:hover {
background: #5865f2;
}
#startButton {
background: #22c55e;
font-size: 16px;
padding: 10px 20px;
font-weight: 600;
animation: pulse 2s infinite;
}
#startButton:hover {
background: #16a34a;
}
@keyframes pulse {
0% { transform: scale(1); }
50% { transform: scale(1.05); }
100% { transform: scale(1); }
}
.api-key.highlight {
border-color: #ff6b6b;
outline: none;
box-shadow: 0 0 0 2px rgba(255, 107, 107, 0.3);
}
.api-key-container {
display: flex;
flex-direction: row;
gap: 8px;
}
.api-key-info {
font-size: 13px;
color: #a0a0a0;
margin: auto;
}
.api-key-info a {
color: #5865f2;
text-decoration: none;
}
.api-key-info a:hover {
text-decoration: underline;
}
#startButton:disabled {
opacity: 0.5;
cursor: not-allowed;
background: #2a2a2a;
}
.chat-container {
display: flex;
flex-direction: column;
height: 100%;
background: #2a2a2a;
border-radius: 12px;
overflow: hidden;
}
.responses {
flex-grow: 1;
padding: 16px;
background: #2a2a2a;
overflow-y: auto;
margin-bottom: 10px;
}
.input-container {
display: flex;
gap: 10px;
padding: 16px;
background: #232323;
border-top: 1px solid #404040;
}
.message {
margin: 8px 0;
padding: 12px;
border-radius: 8px;
line-height: 1.5;
}
.user-message {
background: #404eed;
margin-left: 20px;
color: #fff;
}
.assistant-message {
background: #333;
margin-right: 20px;
}
.markdown-content {
white-space: pre-wrap;
word-wrap: break-word;
}
.markdown-content li {
margin-left: 20px;
margin-bottom: 5px;
}
.markdown-content code {
background: #232323;
padding: 2px 6px;
border-radius: 4px;
font-family: ui-monospace, monospace;
font-size: 0.9em;
}
.responses::-webkit-scrollbar {
width: 8px;
}
.responses::-webkit-scrollbar-track {
background: #232323;
border-radius: 4px;
}
.responses::-webkit-scrollbar-thumb {
background: #404040;
border-radius: 4px;
}
.responses::-webkit-scrollbar-thumb:hover {
background: #505050;
}
</style>
</head>
<body>
<div class="left-panel">
<div class="controls">
<select id="videoSource"></select>
<select id="audioSource"></select>
<button id="startButton">Start Stream</button>
<select id="responseType">
<option value="text">Text Response</option>
<option value="audio">Audio Response</option>
</select>
<select id="voiceSelect" style="display: none;">
<option value="Aoede">Female Voice 1 (Aoede)</option>
<option value="Kore">Female Voice 2 (Kore)</option>
<option value="Puck">Male Voice 1 (Puck)</option>
<option value="Charon">Male Voice 2 (Charon)</option>
<option value="Fenrir">Male Voice 3 (Fenrir)</option>
</select>
<div class="api-key-container">
<input type="password" id="apiKey" placeholder="Enter Gemini API Key" size="15" class="api-key">
<div class="api-key-info">
Get your free Gemini API key at <a href="https://aistudio.google.com/app/apikey" target="_blank" rel="noopener">Google AI Studio</a>.
</div>
</div> </div>
<div id="error"></div>
<video class="preview" id="preview" autoplay muted></video>
</div>
<div class="right-panel">
<div class="chat-container">
<div id="responses" class="responses"></div>
<div class="input-container">
<input type="text" class="message-input" placeholder="Type a message...">
<button id="sendButton">Send</button>
</div>
</div>
</div>
<a href="https://github.com/steveseguin/gemini-chatbot" class="github-link" target="_blank" rel="noopener noreferrer" title="Fork on GitHub (MIT License)">
<svg width="24" height="24" viewBox="0 0 24 24" fill="#e0e0e0">
<path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/>
</svg>
</a>
<script>
class GoogleLivePublisher {
constructor(stream, apiKey) {
this.stream = stream;
this.apiKey = apiKey;
this.ws = null;
this.audioContext = null;
this.videoProcessor = null;
this.canvasContext = null;
this.lastImageTime = 0;
this.imageInterval = 200;
this.imageWidth = 640;
this.imageHeight = 360;
this.handleMessage = this.handleMessage.bind(this);
this.audioPlayer = new AudioPlayer();
}
async handleMessage(event) {
try {
let response;
if (event.data instanceof Blob) {
const text = await event.data.text();
response = JSON.parse(text);
} else {
response = JSON.parse(event.data);
}
if (response.setupComplete) {
console.log('Setup complete received');
this.sendPrompt("Hi, introduce yourself in a sentence for me. Be friendly to me.");
}
if (response.serverContent?.modelTurn?.parts) {
const parts = response.serverContent.modelTurn.parts;
let hasAudioParts = false;
parts.forEach(part => {
if (part.text) {
console.log('Model response:', part.text);
const event = new CustomEvent('modelResponse', {
detail: {
text: part.text
}
});
window.dispatchEvent(event);
}
if (part.inlineData && part.inlineData.mimeType.startsWith('audio/')) {
hasAudioParts = true;
console.log('Received audio response with mime type:', part.inlineData.mimeType);
try {
const rateMatch = part.inlineData.mimeType.match(/rate=(\d+)/);
const sampleRate = rateMatch ? parseInt(rateMatch[1]) : 24000;
this.audioPlayer.resume();
const audioData = base64ToArrayBuffer(part.inlineData.data);
console.log('Processing audio chunk of size:', audioData.byteLength);
this.audioPlayer.addPCM16(new Uint8Array(audioData));
} catch (err) {
console.error('Error processing audio:', err);
}
}
});
if (response.serverContent.turnComplete && hasAudioParts) {
console.log('Turn complete, finalizing audio');
this.audioPlayer.complete();
}
}
if (!response.setupComplete && !response.serverContent) {
console.log('Other response type:', response);
}
} catch (err) {
console.error('Error handling message:', err);
}
}
sendPrompt(text) {
if (!this.isConnected()) {
console.error('WebSocket not connected, attempting reconnect...');
this.connect().then(() => {
this._sendPromptInternal(text);
});
return;
}
this._sendPromptInternal(text);
}
_sendPromptInternal(text) {
if (this.isConnected()) {
const message = {
clientContent: {
turns: [{
role: "user",
parts: [{
text
}]
}],
turnComplete: true
}
};
console.log('Sending prompt:', message);
this.ws.send(JSON.stringify(message));
} else {
console.error('WebSocket still not ready after reconnect attempt');
}
}
sendMediaChunk(mediaChunks) {
if (this.ws?.readyState === WebSocket.OPEN) {
const message = {
realtimeInput: {
mediaChunks: mediaChunks.map(chunk => ({
mimeType: chunk.inlineData.mimeType,
data: chunk.inlineData.data
}))
}
};
this.ws.send(JSON.stringify(message));
}
}
isConnected() {
return this.ws && this.ws.readyState === WebSocket.OPEN;
}
async connect() {
const host = 'generativelanguage.googleapis.com';
const uri = `wss://${host}/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent?key=${this.apiKey}`;
if (this.isConnected()) {
console.log('Already connected');
return;
}
const responseType = document.getElementById('responseType');
const voiceSelect = document.getElementById('voiceSelect');
voiceSelect.style.display = responseType.value === 'audio' ? 'block' : 'none';
this.ws = new WebSocket(uri);
this.ws.onmessage = this.handleMessage;
this.ws.onerror = (error) => {
console.error('WebSocket error:', error);
};
this.ws.onclose = (event) => {
console.log('WebSocket closed:', event.code, event.reason);
};
await new Promise((resolve, reject) => {
this.ws.addEventListener('open', resolve, {
once: true
});
this.ws.addEventListener('error', reject, {
once: true
});
});
const setupMessage = {
setup: {
model: "models/gemini-2.0-flash-exp",
systemInstruction: {
parts: [{
text: "You are a friendly and helpful social chat assistant that can see and hear the user."
}]
},
generationConfig: {
temperature: 0.9,
topK: 1,
topP: 1,
candidateCount: 1,
responseModalities: responseType.value === 'audio' ? 'AUDIO' : 'TEXT',
...(responseType.value === 'audio' && {
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: {
voiceName: voiceSelect.value
}
}
}
})
}
}
};
console.log('Sending setup message:', setupMessage);
this.ws.send(JSON.stringify(setupMessage));
}
async start() {
try {
await this.connect();
await this.setupAudioProcessing();
this.setupVideoProcessing();
} catch (err) {
console.error('Failed to start:', err);
this.stop();
throw err;
}
}
async setupAudioProcessing() {
this.audioContext = new AudioContext({
sampleRate: 16000
});
const workletBlob = new Blob([`registerProcessor('audio-processor', ${AudioProcessingWorklet})`], {
type: 'application/javascript'
});
const workletUrl = URL.createObjectURL(workletBlob);
await this.audioContext.audioWorklet.addModule(workletUrl);
URL.revokeObjectURL(workletUrl);
const source = this.audioContext.createMediaStreamSource(this.stream);
const processor = new AudioWorkletNode(this.audioContext, 'audio-processor');
processor.port.onmessage = (event) => {
if (event.data.data?.int16arrayBuffer) {
const base64Audio = btoa(String.fromCharCode(...new Uint8Array(event.data.data.int16arrayBuffer)));
this.sendMediaChunk([{
mime_type: "audio/pcm;rate=16000",
data: base64Audio
}]);
}
};
source.connect(processor);
}
setupVideoProcessing() {
const canvas = document.createElement('canvas');
canvas.width = this.imageWidth;
canvas.height = this.imageHeight;
this.canvasContext = canvas.getContext('2d');
const videoTrack = this.stream.getVideoTracks()[0];
const videoElement = document.createElement('video');
videoElement.srcObject = new MediaStream([videoTrack]);
videoElement.autoplay = true;
const captureFrame = () => {
const now = Date.now();
if (now - this.lastImageTime >= this.imageInterval) {
this.canvasContext.drawImage(videoElement, 0, 0, this.imageWidth, this.imageHeight);
const base64Image = canvas.toDataURL('image/jpeg', 0.8).split(',')[1];
this.sendMediaChunk([{
mime_type: "image/jpeg",
data: base64Image
}]);
this.lastImageTime = now;
}
if (!this.stopped) {
requestAnimationFrame(captureFrame);
}
};
videoElement.addEventListener('loadedmetadata', () => {
requestAnimationFrame(captureFrame);
});
}
sendMediaChunk(mediaChunks) {
if (this.ws?.readyState === WebSocket.OPEN) {
const message = {
realtimeInput: {
mediaChunks
}
};
this.ws.send(JSON.stringify(message));
}
}
stop() {
this.stopped = true;
this.ws?.close();
this.audioContext?.close();
this.audioPlayer?.stop();
this.ws = null;
this.audioContext = null;
this.videoProcessor = null;
this.canvasContext = null;
}
}
class AudioPlayer {
constructor() {
this.context = new AudioContext();
this.gainNode = this.context.createGain();
this.gainNode.connect(this.context.destination);
this.gainNode.gain.value = 1;
this.bufferSize = 8192 * 4;
this.sampleRate = 24000;
this.processingBuffer = new Float32Array(0);
this.audioQueue = [];
this.isPlaying = false;
this.scheduledTime = 0;
this.currentSource = null;
this.silencePadding = 0.015;
this.startDelay = 0.05;
this.bufferTarget = 3;
this.scheduleAheadTime = 0.2;
this.minimumBufferSize = this.bufferSize;
this.underrunRecoveryTime = 0.2;
this.maxBufferSize = this.bufferSize * 8;
this.isPaused = false;
this.lastPlaybackTime = 0;
this.totalScheduledDuration = 0;
this.underrunCount = 0;
this.lastUnderrunTime = 0;
this.adaptiveBufferTarget = this.bufferTarget;
}
addPCM16(chunk) {
const float32Array = new Float32Array(chunk.length / 2);
const dataView = new DataView(chunk.buffer);
for (let i = 0; i < chunk.length / 2; i++) {
float32Array[i] = dataView.getInt16(i * 2, true) / 32768;
}
const newBuffer = new Float32Array(this.processingBuffer.length + float32Array.length);
newBuffer.set(this.processingBuffer);
newBuffer.set(float32Array, this.processingBuffer.length);
this.processingBuffer = newBuffer;
if (this.processingBuffer.length >= this.minimumBufferSize) {
const paddedBuffer = this.addSilencePadding(this.processingBuffer);
this.audioQueue.push(paddedBuffer);
this.processingBuffer = new Float32Array(0);
if (!this.isPlaying && this.audioQueue.length >= this.adaptiveBufferTarget) {
this.isPlaying = true;
this.scheduledTime = this.context.currentTime + (this.initialChunk ? this.startDelay : 0);
this.initialChunk = false;
this.scheduleNextBuffer();
}
}
}
addSilencePadding(audioData) {
const paddingSamples = Math.floor(this.silencePadding * this.sampleRate);
const crossfadeSamples = Math.min(paddingSamples, Math.floor(this.sampleRate * 0.015));
const paddedBuffer = new Float32Array(audioData.length + (paddingSamples * 2));
paddedBuffer.set(audioData, paddingSamples);
for (let i = 0; i < crossfadeSamples; i++) {
const fadeIn = 0.5 * (1 - Math.cos((i / crossfadeSamples) * Math.PI));
paddedBuffer[paddingSamples + i] *= fadeIn;
}
for (let i = 0; i < crossfadeSamples; i++) {
const fadeOut = 0.5 * (1 + Math.cos((i / crossfadeSamples) * Math.PI));
paddedBuffer[paddingSamples + audioData.length - crossfadeSamples + i] *= fadeOut;
}
return paddedBuffer;
}
scheduleNextBuffer() {
if (!this.isPlaying || this.isPaused) return;
const now = this.context.currentTime;
const buffersNeeded = Math.max(0, this.adaptiveBufferTarget - this.audioQueue.length);
if (this.audioQueue.length === 0) {
this.underrunCount++;
this.lastUnderrunTime = Date.now();
this.isPaused = true;
this.lastPlaybackTime = this.scheduledTime;
return;
}
while (this.audioQueue.length > 0 &&
this.scheduledTime < now + this.scheduleAheadTime) {
const audioData = this.audioQueue.shift();
const audioBuffer = this.createAudioBuffer(audioData);
const source = this.context.createBufferSource();
source.buffer = audioBuffer;
const startTime = Math.max(this.scheduledTime, now);
source.connect(this.gainNode);
const scheduleOffset = 0.005;
source.start(startTime + scheduleOffset);
this.currentSource = source;
this.scheduledTime = startTime + audioBuffer.duration - this.silencePadding;
source.onended = () => {
if (this.audioQueue.length > 0) {
requestAnimationFrame(() => this.scheduleNextBuffer());
}
};
}
if (this.isPlaying && !this.isPaused) {
const nextCheckDelay = Math.max(10,
(this.scheduledTime - this.context.currentTime) * 500
);
setTimeout(() => this.scheduleNextBuffer(), nextCheckDelay);
}
}
createAudioBuffer(audioData) {
const audioBuffer = this.context.createBuffer(1, audioData.length, this.sampleRate);
audioBuffer.getChannelData(0).set(audioData);
return audioBuffer;
}
stop() {
this.complete();
setTimeout(() => {
this.isPlaying = false;
this.isPaused = false;
if (this.currentSource) {
try {
this.currentSource.stop();
} catch (e) {
console.warn('Error stopping current source:', e);
}
this.currentSource = null;
}
this.audioQueue = [];
this.processingBuffer = new Float32Array(0);
this.underrunCount = 0;
this.lastUnderrunTime = 0;
this.adaptiveBufferTarget = this.bufferTarget;
this.initialChunk = true;
this.totalScheduledDuration = 0;
this.lastPlaybackTime = 0;
const currentTime = this.context.currentTime;
this.gainNode.gain.setValueAtTime(this.gainNode.gain.value, currentTime);
this.gainNode.gain.linearRampToValueAtTime(0, currentTime + 0.2);
setTimeout(() => {
this.gainNode.disconnect();
this.gainNode = this.context.createGain();
this.gainNode.connect(this.context.destination);
}, 300);
}, 500);
}
complete() {
if (this.processingBuffer.length > 0) {
const paddedBuffer = this.addSilencePadding(this.processingBuffer);
this.audioQueue.push(paddedBuffer);
this.processingBuffer = new Float32Array(0);
}
const endingSilence = new Float32Array(Math.floor(this.sampleRate * 0.2));
this.audioQueue.push(endingSilence);
if (this.isPlaying) {
this.scheduleNextBuffer();
} else if (this.audioQueue.length > 0) {
this.isPlaying = true;
this.scheduledTime = this.context.currentTime + 0.05;
this.scheduleNextBuffer();
}
}
async resume() {
if (this.context.state === "suspended") {
await this.context.resume();
}
this.gainNode.gain.setValueAtTime(0, this.context.currentTime);
this.gainNode.gain.linearRampToValueAtTime(1, this.context.currentTime + 0.1);
}
}
function base64ToArrayBuffer(base64) {
const binaryString = atob(base64);
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes.buffer;
}
class MessageFormatter {
constructor() {
this.currentMessage = '';
this.currentMessageElement = null;
this.messageBuffer = '';
this.messageComplete = false;
this.lastMessageTime = Date.now();
this.pauseThreshold = 300;
}
formatMarkdown(text) {
let formatted = text
.replace(/\*\*\*(.*?)\*\*\*/g, '<strong><em>$1</em></strong>')
.replace(/\*\*(.*?)\*\*/g, '<strong>$1</strong>')
.replace(/\*(.*?)\*/g, '<em>$1</em>')
.replace(/`(.*?)`/g, '<code>$1</code>');
const lines = formatted.split('\n');
const formattedLines = lines.map(line => {
if (line.trim().startsWith('*') && line.trim()[1] === ' ') {
return `<li>${line.trim().substring(2)}</li>`;
}
if (/^\d+\./.test(line.trim())) {
return `<li>${line.trim()}</li>`;
}
return line;
});
return formattedLines.join('\n')
.replace(/\n\n/g, '<br><br>')
.replace(/\n(?![<])/g, '<br>');
}
appendMessage(text, isUser = false) {
const now = Date.now();
if (isUser) {
const messageDiv = document.createElement('div');
messageDiv.className = 'message user-message';
const contentDiv = document.createElement('div');
contentDiv.className = 'markdown-content';
contentDiv.textContent = text;
messageDiv.appendChild(contentDiv);
responsesDiv.appendChild(messageDiv);
this.messageComplete = true;
this.scrollToBottom();
this.lastMessageTime = now;
return;
}
if (this.currentMessageElement && (now - this.lastMessageTime > this.pauseThreshold)) {
this.messageBuffer += '\n';
}
this.messageBuffer += text;
this.lastMessageTime = now;
if (!this.currentMessageElement) {
this.currentMessageElement = document.createElement('div');
this.currentMessageElement.className = 'message assistant-message';
const contentDiv = document.createElement('div');
contentDiv.className = 'markdown-content';
this.currentMessageElement.appendChild(contentDiv);
responsesDiv.appendChild(this.currentMessageElement);
}
const contentDiv = this.currentMessageElement.querySelector('.markdown-content');
contentDiv.innerHTML = this.formatMarkdown(this.messageBuffer);
if (
this.messageBuffer.match(/\n\n$/) ||
this.messageBuffer.match(/[.!?]\s+$/) ||
this.messageBuffer.match(/\n\s*[-*]\s.*\n\n$/)
) {
this.finalizeMessage();
}
this.scrollToBottom();
}
finalizeMessage() {
this.messageBuffer = '';
this.currentMessageElement = null;
this.messageComplete = true;
this.lastMessageTime = Date.now();
}
scrollToBottom() {
responsesDiv.scrollTop = responsesDiv.scrollHeight;
}
}
const AudioProcessingWorklet = `
class AudioProcessor extends AudioWorkletProcessor {
buffer = new Int16Array(2048);
bufferWriteIndex = 0;
process(inputs) {
if (inputs[0].length) {
const samples = inputs[0][0];
for (let i = 0; i < samples.length; i++) {
const int16Value = samples[i] * 32768;
this.buffer[this.bufferWriteIndex++] = int16Value;
if(this.bufferWriteIndex >= this.buffer.length) {
this.port.postMessage({
data: { int16arrayBuffer: this.buffer.buffer }
});
this.bufferWriteIndex = 0;
}
}
}
return true;
}
}`;
const messageFormatter = new MessageFormatter();
window.addEventListener('modelResponse', (event) => {
console.log(event.detail.text);
messageFormatter.appendMessage(event.detail.text);
});
let stream = null;
const videoSelect = document.getElementById('videoSource');
const audioSelect = document.getElementById('audioSource');
const preview = document.getElementById('preview');
const errorDisplay = document.getElementById('error');
const responsesDiv = document.getElementById('responses');
let publisher = null;
function validateApiKey() {
const apiKey = document.getElementById('apiKey').value.trim();
startButton.disabled = !apiKey;
return apiKey;
}
document.getElementById('apiKey').value = localStorage.getItem('apiKey') || '';
validateApiKey();
document.getElementById('apiKey').addEventListener('input', validateApiKey);
startButton.addEventListener('click', async () => {
const apiKeyInput = document.getElementById('apiKey');
const apiKey = apiKeyInput.value.trim();
if (!apiKey) {
apiKeyInput.classList.add('highlight');
setTimeout(() => apiKeyInput.classList.remove('highlight'), 2000);
return;
}
try {
if (publisher) {
startButton.textContent = 'Starting...';
startButton.disabled = true;
publisher.stop();
publisher = null;
preview.srcObject = null;
startButton.textContent = 'Start Stream';
startButton.disabled = false;
return;
}
startButton.textContent = 'Starting...';
startButton.disabled = true;
const stream = await getStream();
preview.srcObject = stream;
localStorage.setItem('apiKey', apiKey);
publisher = new GoogleLivePublisher(stream, apiKey);
await publisher.start();
startButton.textContent = 'Stop Stream';
startButton.disabled = false;
} catch (err) {
console.error(err);
showError('Failed to start publishing: ' + err.message);
startButton.textContent = 'Start Stream';
startButton.disabled = false;
}
});
async function getDevices() {
try {
await navigator.mediaDevices.getUserMedia({
audio: true,
video: true
})
.then(stream => stream.getTracks().forEach(track => track.stop()))
.catch(e => console.warn('Permission denied:', e));
const devices = await navigator.mediaDevices.enumerateDevices();
const videoDevices = devices.filter(d => d.kind === 'videoinput');
const audioDevices = devices.filter(d => d.kind === 'audioinput');
videoDevices.forEach(device => {
const option = document.createElement('option');
option.value = device.deviceId;
option.text = device.label || `Camera ${videoSelect.length + 1}`;
videoSelect.appendChild(option);
});
audioDevices.forEach(device => {
const option = document.createElement('option');
option.value = device.deviceId;
option.text = device.label || `Microphone ${audioSelect.length + 1}`;
audioSelect.appendChild(option);
});
} catch (err) {
showError('Failed to get devices: ' + err.message);
}
}
async function getStream() {
if (stream) {
stream.getTracks().forEach(track => track.stop());
}
const constraints = {
video: {
deviceId: videoSelect.value ? {
exact: videoSelect.value
} : undefined
},
audio: {
deviceId: audioSelect.value ? {
exact: audioSelect.value
} : undefined
}
};
try {
stream = await navigator.mediaDevices.getUserMedia(constraints);
preview.srcObject = stream;
return stream;
} catch (err) {
showError('Failed to get stream: ' + err.message);
throw err;
}
}
function showError(message) {
errorDisplay.textContent = message;
}
if (!navigator.mediaDevices?.getUserMedia) {
showError('getUserMedia not supported');
} else {
navigator.mediaDevices.getUserMedia({
video: true,
audio: true
})
.then(initialStream => {
initialStream.getTracks().forEach(track => track.stop());
getDevices();
})
.catch(err => showError('Initial permission request failed: ' + err.message));
navigator.mediaDevices.addEventListener('devicechange', getDevices);
}
const messageInput = document.querySelector('.message-input');
const sendButton = document.querySelector('#sendButton');
responsesDiv.parentElement.insertBefore(messageInput, responsesDiv);
responsesDiv.parentElement.insertBefore(sendButton, responsesDiv);
sendButton.addEventListener('click', async () => {
if (!publisher) {
showError('Please start the stream first');
return;
}
if (messageInput.value.trim()) {
try {
messageFormatter.appendMessage(messageInput.value, true);
await publisher.sendPrompt(messageInput.value);
messageInput.value = '';
} catch (err) {
console.error('Failed to send message:', err);
showError('Failed to send message: ' + err.message);
}
}
});
document.getElementById('voiceSelect').addEventListener('change', async () => {
if (publisher && startButton.textContent === 'Stop Stream') {
startButton.textContent = 'Starting...';
startButton.disabled = true;
publisher.stop();
publisher = null;
try {
const stream = await getStream();
preview.srcObject = stream;
const apiKey = document.getElementById('apiKey').value;
publisher = new GoogleLivePublisher(stream, apiKey);
await publisher.start();
startButton.textContent = 'Stop Stream';
startButton.disabled = false;
} catch (err) {
console.error(err);
showError('Failed to restart with new voice: ' + err.message);
startButton.textContent = 'Start Stream';
startButton.disabled = false;
}
}
});
document.getElementById('responseType').addEventListener('change', function() {
const voiceSelect = document.getElementById('voiceSelect');
voiceSelect.style.display = this.value === 'audio' ? 'block' : 'none';
if (publisher && startButton.textContent === 'Stop Stream') {
startButton.textContent = 'Starting...';
startButton.disabled = true;
publisher.stop();
publisher = null;
(async () => {
try {
const stream = await getStream();
preview.srcObject = stream;
const apiKey = document.getElementById('apiKey').value;
publisher = new GoogleLivePublisher(stream, apiKey);
await publisher.start();
startButton.textContent = 'Stop Stream';
startButton.disabled = false;
} catch (err) {
console.error(err);
showError('Failed to restart with new response type: ' + err.message);
startButton.textContent = 'Start Stream';
startButton.disabled = false;
}
})();
}
});
messageInput.addEventListener('keypress', (e) => {
if (e.key === 'Enter') {
sendButton.click();
}
});
</script>
</body>
</html>