google-gemini
diff --git a/‎command-line/node/main.mts‎
Lines changed: 1 addition & 1 deletion b/‎command-line/node/main.mts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎command-line/python/main.py‎
Lines changed: 1 addition & 1 deletion b/‎command-line/python/main.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎gemini-live-ephemeral-tokens-websocket/README.md‎
Lines changed: 1 addition & 1 deletion b/‎gemini-live-ephemeral-tokens-websocket/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎gemini-live-ephemeral-tokens-websocket/frontend/audio-processors/capture.worklet.js‎
Lines changed: 1 addition & 1 deletion b/‎gemini-live-ephemeral-tokens-websocket/frontend/audio-processors/capture.worklet.js‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎gemini-live-ephemeral-tokens-websocket/frontend/audio-processors/playback.worklet.js‎
Lines changed: 12 additions & 8 deletions b/‎gemini-live-ephemeral-tokens-websocket/frontend/audio-processors/playback.worklet.js‎
Lines changed: 12 additions & 8 deletions
diff --git a/‎gemini-live-ephemeral-tokens-websocket/frontend/geminilive.js‎
Lines changed: 82 additions & 68 deletions b/‎gemini-live-ephemeral-tokens-websocket/frontend/geminilive.js‎
Lines changed: 82 additions & 68 deletions
diff --git a/‎gemini-live-ephemeral-tokens-websocket/frontend/index.html‎
Lines changed: 1 addition & 14 deletions b/‎gemini-live-ephemeral-tokens-websocket/frontend/index.html‎
Lines changed: 1 addition & 14 deletions
diff --git a/‎gemini-live-ephemeral-tokens-websocket/frontend/mediaUtils.js‎
Lines changed: 4 additions & 3 deletions b/‎gemini-live-ephemeral-tokens-websocket/frontend/mediaUtils.js‎
Lines changed: 4 additions & 3 deletions
@@ -8,7 +8,7 @@ const ai = new GoogleGenAI({});
 // More information at: https://ai.google.dev/gemini-api/docs/ephemeral-tokens
 
 // --- Live API config ---
-const model = 'gemini-2.5-flash-native-audio-preview-12-2025';
+const model = 'gemini-3.1-flash-live-preview';
 const config = {
   responseModalities: [Modality.AUDIO],
   systemInstruction: "You are a helpful and friendly AI assistant.",
 
@@ -14,7 +14,7 @@
 pya = pyaudio.PyAudio()
 
 # --- Live API config ---
-MODEL = "gemini-2.5-flash-native-audio-preview-12-2025"
+MODEL = "gemini-3.1-flash-live-preview"
 CONFIG = {
     "response_modalities": ["AUDIO"],
     "system_instruction": "You are a helpful and friendly AI assistant.",
 
@@ -92,7 +92,7 @@ class MyTool extends FunctionCallDefinition {
 
 ## Configuration Options
 
-- **Model**: `gemini-2.5-flash-native-audio-preview-12-2025` (default)
+- **Model**: `gemini-3.1-flash-live-preview` (default)
 - **Voice**: Puck, Charon, Kore, Fenrir, Aoede
 - **Response**: Audio, text, or both
 - **Tools**: Custom functions or Google Search grounding
 
@@ -5,7 +5,7 @@
 class AudioCaptureProcessor extends AudioWorkletProcessor {
   constructor() {
     super();
-    this.bufferSize = 4096;
+    this.bufferSize = 512; // 32ms at 16kHz — per Gemini best practices (20-40ms chunks)
     this.buffer = new Float32Array(this.bufferSize);
     this.bufferIndex = 0;
   }
 
@@ -1,16 +1,20 @@
 /**
- * Audio Playback Worklet Processor for playing PCM audio
+ * Audio Playback Worklet Processor for playing PCM audio.
+ * Uses an offset tracker instead of slice() to avoid allocations
+ * on the real-time audio thread.
  */
 
 class PCMProcessor extends AudioWorkletProcessor {
   constructor() {
     super();
     this.audioQueue = [];
+    this.currentOffset = 0; // Track position in current buffer (avoids slice())
 
     this.port.onmessage = (event) => {
       if (event.data === "interrupt") {
         // Clear the queue on interrupt
         this.audioQueue = [];
+        this.currentOffset = 0;
       } else if (event.data instanceof Float32Array) {
         // Add audio data to the queue
         this.audioQueue.push(event.data);
@@ -31,23 +35,23 @@ class PCMProcessor extends AudioWorkletProcessor {
 
       if (!currentBuffer || currentBuffer.length === 0) {
         this.audioQueue.shift();
+        this.currentOffset = 0;
         continue;
       }
 
       const remainingOutput = channel.length - outputIndex;
-      const remainingBuffer = currentBuffer.length;
+      const remainingBuffer = currentBuffer.length - this.currentOffset;
       const copyLength = Math.min(remainingOutput, remainingBuffer);
 
-      // Copy audio data to output
+      // Copy audio data to output using offset (no slice allocation)
       for (let i = 0; i < copyLength; i++) {
-        channel[outputIndex++] = currentBuffer[i];
+        channel[outputIndex++] = currentBuffer[this.currentOffset++];
       }
 
-      // Update or remove the current buffer
-      if (copyLength < remainingBuffer) {
-        this.audioQueue[0] = currentBuffer.slice(copyLength);
-      } else {
+      // If we've consumed the entire buffer, move to the next one
+      if (this.currentOffset >= currentBuffer.length) {
         this.audioQueue.shift();
+        this.currentOffset = 0;
       }
     }
 
 
@@ -19,59 +19,82 @@ const MultimodalLiveResponseType = {
 /**
  * Parses response messages from the Gemini Live API
  */
-class MultimodalLiveResponseMessage {
-  constructor(data) {
-    this.data = "";
-    this.type = "";
-    this.endOfTurn = false;
+/**
+ * Parses ALL response types from a single server message.
+ * The server can now bundle multiple fields (e.g. audio + transcription)
+ * in the same message. Returns an array of response objects.
+ */
+function parseResponseMessages(data) {
+  const responses = [];
+  const serverContent = data?.serverContent;
+  const parts = serverContent?.modelTurn?.parts;
+
+  try {
+    // Setup complete (exclusive — no other fields expected)
+    if (data?.setupComplete) {
+      console.log("🏁 SETUP COMPLETE response", data);
+      responses.push({ type: MultimodalLiveResponseType.SETUP_COMPLETE, data: "", endOfTurn: false });
+      return responses;
+    }
 
-    console.log("raw message data: ", data);
+    // Tool call (exclusive)
+    if (data?.toolCall) {
+      console.log("🎯 🛠️ TOOL CALL response", data?.toolCall);
+      responses.push({ type: MultimodalLiveResponseType.TOOL_CALL, data: data.toolCall, endOfTurn: false });
+      return responses;
+    }
 
-    const serverContent = data?.serverContent;
-    this.endOfTurn = serverContent?.turnComplete;
-    const parts = serverContent?.modelTurn?.parts;
+    // Audio data from model turn parts
+    if (parts?.length) {
+      for (const part of parts) {
+        if (part.inlineData) {
+          responses.push({ type: MultimodalLiveResponseType.AUDIO, data: part.inlineData.data, endOfTurn: false });
+        } else if (part.text) {
+          console.log("💬 TEXT response", part.text);
+          responses.push({ type: MultimodalLiveResponseType.TEXT, data: part.text, endOfTurn: false });
+        }
+      }
+    }
 
-    try {
-      if (data?.setupComplete) {
-        console.log("🏁 SETUP COMPLETE response", data);
-        this.type = MultimodalLiveResponseType.SETUP_COMPLETE;
-      } else if (serverContent?.turnComplete) {
-        console.log("🏁 TURN COMPLETE response");
-        this.type = MultimodalLiveResponseType.TURN_COMPLETE;
-      } else if (serverContent?.interrupted) {
-        console.log("🗣️ INTERRUPTED response");
-        this.type = MultimodalLiveResponseType.INTERRUPTED;
-      } else if (serverContent?.inputTranscription) {
-        console.log("📝 INPUT TRANSCRIPTION:", serverContent.inputTranscription);
-        this.type = MultimodalLiveResponseType.INPUT_TRANSCRIPTION;
-        this.data = {
+    // Transcriptions — checked independently, NOT in else-if with audio
+    if (serverContent?.inputTranscription) {
+      responses.push({
+        type: MultimodalLiveResponseType.INPUT_TRANSCRIPTION,
+        data: {
           text: serverContent.inputTranscription.text || "",
           finished: serverContent.inputTranscription.finished || false,
-        };
-      } else if (serverContent?.outputTranscription) {
-        console.log("📝 OUTPUT TRANSCRIPTION:", serverContent.outputTranscription);
-        this.type = MultimodalLiveResponseType.OUTPUT_TRANSCRIPTION;
-        this.data = {
+        },
+        endOfTurn: false,
+      });
+    }
+
+    if (serverContent?.outputTranscription) {
+      responses.push({
+        type: MultimodalLiveResponseType.OUTPUT_TRANSCRIPTION,
+        data: {
           text: serverContent.outputTranscription.text || "",
           finished: serverContent.outputTranscription.finished || false,
-        };
-      } else if (data?.toolCall) {
-        console.log("🎯 🛠️ TOOL CALL response", data?.toolCall);
-        this.type = MultimodalLiveResponseType.TOOL_CALL;
-        this.data = data?.toolCall;
-      } else if (parts?.length && parts[0].text) {
-        console.log("💬 TEXT response", parts[0].text);
-        this.data = parts[0].text;
-        this.type = MultimodalLiveResponseType.TEXT;
-      } else if (parts?.length && parts[0].inlineData) {
-        console.log("🔊 AUDIO response");
-        this.data = parts[0].inlineData.data;
-        this.type = MultimodalLiveResponseType.AUDIO;
-      }
-    } catch (err) {
-      console.log("⚠️ Error parsing response data: ", err, data);
+        },
+        endOfTurn: false,
+      });
     }
+
+    // Interrupted
+    if (serverContent?.interrupted) {
+      console.log("🗣️ INTERRUPTED response");
+      responses.push({ type: MultimodalLiveResponseType.INTERRUPTED, data: "", endOfTurn: false });
+    }
+
+    // Turn complete
+    if (serverContent?.turnComplete) {
+      console.log("🏁 TURN COMPLETE response");
+      responses.push({ type: MultimodalLiveResponseType.TURN_COMPLETE, data: "", endOfTurn: true });
+    }
+  } catch (err) {
+    console.log("⚠️ Error parsing response data: ", err, data);
   }
+
+  return responses;
 }
 
 /**
@@ -105,7 +128,7 @@ class FunctionCallDefinition {
         parameters
       )}`
     );
-    this.functionToCall(parameters);
+    return this.functionToCall(parameters);
   }
 }
 
@@ -121,10 +144,8 @@ class GeminiLiveAPI {
     this.responseModalities = ["AUDIO"];
     this.systemInstructions = "";
     this.googleGrounding = false;
-    this.enableAffectiveDialog = false; // Default affective dialog
     this.voiceName = "Puck"; // Default voice
     this.temperature = 1.0; // Default temperature
-    this.proactivity = { proactiveAudio: false }; // Proactivity config
     this.inputAudioTranscription = false;
     this.outputAudioTranscription = false;
     this.enableFunctionCalls = false;
@@ -195,10 +216,7 @@ class GeminiLiveAPI {
     this.voiceName = voiceName;
   }
 
-  setProactivity(proactivity) {
-    console.log("setting proactivity: ", proactivity);
-    this.proactivity = proactivity;
-  }
+
 
   setInputAudioTranscription(enabled) {
     console.log("setting input audio transcription: ", enabled);
@@ -223,7 +241,7 @@ class GeminiLiveAPI {
 
   callFunction(functionName, parameters) {
     const functionToCall = this.functionsMap[functionName];
-    functionToCall.runFunction(parameters);
+    return functionToCall.runFunction(parameters);
   }
 
   connect() {
@@ -238,15 +256,12 @@ class GeminiLiveAPI {
   }
 
   sendMessage(message) {
-    console.log("🟩 Sending message: ", message);
     if (this.webSocket && this.webSocket.readyState === WebSocket.OPEN) {
       this.webSocket.send(JSON.stringify(message));
     }
   }
 
   async onReceiveMessage(messageEvent) {
-    console.log("Message received: ", messageEvent);
-
     let jsonData;
     if (messageEvent.data instanceof Blob) {
       jsonData = await messageEvent.data.text();
@@ -258,8 +273,11 @@ class GeminiLiveAPI {
 
     try {
       const messageData = JSON.parse(jsonData);
-      const message = new MultimodalLiveResponseMessage(messageData);
-      this.onReceiveResponse(message);
+      // Parse all response types from this message (audio + transcription can coexist)
+      const responses = parseResponseMessages(messageData);
+      for (const response of responses) {
+        this.onReceiveResponse(response);
+      }
     } catch (err) {
       console.error("Error parsing JSON message:", err, jsonData);
     }
@@ -322,8 +340,8 @@ class GeminiLiveAPI {
           },
         },
         systemInstruction: { parts: [{ text: this.systemInstructions }] },
-        tools: { functionDeclarations: tools },
-        proactivity: this.proactivity,
+        tools: [{ functionDeclarations: tools }],
+
 
         realtimeInputConfig: {
           automaticActivityDetection: {
@@ -334,6 +352,7 @@ class GeminiLiveAPI {
             startOfSpeechSensitivity: this.automaticActivityDetection.start_of_speech_sensitivity,
           },
           activityHandling: this.activityHandling,
+          turnCoverage: "TURN_INCLUDES_ONLY_ACTIVITY",
         },
       },
     };
@@ -347,18 +366,14 @@ class GeminiLiveAPI {
     }
 
     if (this.googleGrounding) {
-      sessionSetupMessage.setup.tools.googleSearch = {};
       // Currently can't have both Google Search with custom tools.
       console.log(
         "Google Grounding enabled, removing custom function calls if any."
       );
-      delete sessionSetupMessage.setup.tools.functionDeclarations;
+      sessionSetupMessage.setup.tools = [{ googleSearch: {} }];
     }
 
-    // Add affective dialog if enabled
-    if (this.enableAffectiveDialog) {
-      sessionSetupMessage.setup.generationConfig.enableAffectiveDialog = true;
-    }
+
 
     // Store the setup message for later access
     this.lastSetupMessage = sessionSetupMessage;
@@ -376,11 +391,10 @@ class GeminiLiveAPI {
     this.sendMessage(message);
   }
 
-  sendToolResponse(toolCallId, response) {
+  sendToolResponse(functionResponses) {
     const message = {
       toolResponse: {
-        id: toolCallId,
-        response: response,
+        functionResponses: functionResponses,
       },
     };
     console.log("🔧 Sending tool response:", message);
 
@@ -85,8 +85,7 @@ <h2>API Configuration</h2>
 
           <div>
             <label for="model">Model ID:</label><br />
-            <input type="text" id="model" value="gemini-2.5-flash-native-audio-preview-12-2025"
-              placeholder="Enter model ID" />
+            <input type="text" id="model" value="gemini-3.1-flash-live-preview" placeholder="Enter model ID" />
           </div>
         </details>
 
@@ -119,24 +118,12 @@ <h2>API Configuration</h2>
               Lower = more predictable/focused</small>
           </div>
 
-          <div>
-            <input type="checkbox" id="enableProactiveAudio" checked />
-            <label for="enableProactiveAudio">Enable proactive audio (Gemini will ignore speech based on
-              instructions)</label>
-          </div>
-
           <div>
             <input type="checkbox" id="enableGrounding" />
             <label for="enableGrounding">Enable Google grounding (Enabling Google grounding will disable
               custom tools)
             </label>
           </div>
-
-          <div>
-            <input type="checkbox" id="enableAffectiveDialog" checked />
-            <label for="enableAffectiveDialog">Enable affective dialog (emotion detection and empathetic
-              responses)</label>
-          </div>
         </details>
 
         <!-- Custom Tools -->
 
@@ -462,10 +462,11 @@ class AudioPlayer {
         await this.audioContext.resume();
       }
 
-      // Convert base64 to Float32Array
+      // Efficient base64 → binary decode
       const binaryString = atob(base64Audio);
-      const bytes = new Uint8Array(binaryString.length);
-      for (let i = 0; i < binaryString.length; i++) {
+      const len = binaryString.length;
+      const bytes = new Uint8Array(len);
+      for (let i = 0; i < len; i++) {
         bytes[i] = binaryString.charCodeAt(i);
       }
Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@`
`5`	`5`	`class AudioCaptureProcessor extends AudioWorkletProcessor {`
`6`	`6`	`constructor() {`
`7`	`7`	`super();`
`8`		`- this.bufferSize = 4096;`
	`8`	`+ this.bufferSize = 512; // 32ms at 16kHz — per Gemini best practices (20-40ms chunks)`
`9`	`9`	`this.buffer = new Float32Array(this.bufferSize);`
`10`	`10`	`this.bufferIndex = 0;`
`11`	`11`	`}`
Original file line number	Diff line number	Diff line change
`@@ -462,10 +462,11 @@ class AudioPlayer {`
`462`	`462`	`await this.audioContext.resume();`
`463`	`463`	`}`
`464`	`464`
`465`		`- // Convert base64 to Float32Array`
	`465`	`+ // Efficient base64 → binary decode`
`466`	`466`	`const binaryString = atob(base64Audio);`
`467`		`- const bytes = new Uint8Array(binaryString.length);`
`468`		`- for (let i = 0; i < binaryString.length; i++) {`
	`467`	`+ const len = binaryString.length;`
	`468`	`+ const bytes = new Uint8Array(len);`
	`469`	`+ for (let i = 0; i < len; i++) {`
`469`	`470`	`bytes[i] = binaryString.charCodeAt(i);`
`470`	`471`	`}`
`471`	`472`