From c50c69dc1112ac1a07b603c4c56a3eec3f840ba6 Mon Sep 17 00:00:00 2001
From: STC <stc1988@users.noreply.github.com>
Date: Wed, 20 May 2026 23:02:54 +0900
Subject: [PATCH] Support gemini-3.1-flash-live and gpt-realtime-2

---
 contributed/conversationalAI/assets.js        |  6 +++--
 .../network/services/chatAudioIO/readme.md    |  2 +-
 .../workers/googleGeminiLiveModel.js          | 23 +++++++++++++------
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/contributed/conversationalAI/assets.js b/contributed/conversationalAI/assets.js
index 0ab6fa796..9544714fe 100644
--- a/contributed/conversationalAI/assets.js
+++ b/contributed/conversationalAI/assets.js
@@ -262,7 +262,8 @@ const assets = {
 			providers: [
 				{	name:"Google", id:"GOOGLE",
 					models: [
-						{ name:"Gemini 2.5 Flash Live", id:"gemini-2.5-flash-native-audio-preview-12-2025" }
+						{ name:"Gemini 2.5 Flash Live", id:"gemini-2.5-flash-native-audio-preview-12-2025" },
+						{ name:"Gemini 3.1 Flash Live", id:"gemini-3.1-flash-live-preview" }
 					],
 				}
 			],
@@ -435,7 +436,8 @@ const assets = {
 				{	name:"OpenAI", id:"OPEN_AI",
 					models: [
 						{ name:"gpt-realtime", id:"gpt-realtime" },
-						{ name:"gpt-realtime-mini", id:"gpt-realtime-mini" }
+						{ name:"gpt-realtime-mini", id:"gpt-realtime-mini" },
+						{ name:"gpt-realtime-2", id:"gpt-realtime-2" },
 					],
 				}
 			],
diff --git a/modules/network/services/chatAudioIO/readme.md b/modules/network/services/chatAudioIO/readme.md
index 66c524220..16e43c2c8 100644
--- a/modules/network/services/chatAudioIO/readme.md
+++ b/modules/network/services/chatAudioIO/readme.md
@@ -16,7 +16,7 @@ The conversation library implements support for various AI cloud services using
 - [Google Gemini Live](https://ai.google.dev/api/multimodal-live)
 - [Hume Empathic Voice Interface](https://dev.hume.ai/docs/empathic-voice-interface-evi/overview)
 - [Eleven Labs Conversational AI](https://elevenlabs.io/docs/conversational-ai/overview)
-- [Deepgram Voice Agent](https://elevenlabs.io/docs/conversational-ai/overview)
+- [Deepgram Voice Agent](https://deepgram.com/product/voice-agent-api)
 
 ## Programming Interface
 
diff --git a/modules/network/services/chatAudioIO/workers/googleGeminiLiveModel.js b/modules/network/services/chatAudioIO/workers/googleGeminiLiveModel.js
index c0e4291d6..ea918502d 100644
--- a/modules/network/services/chatAudioIO/workers/googleGeminiLiveModel.js
+++ b/modules/network/services/chatAudioIO/workers/googleGeminiLiveModel.js
@@ -21,16 +21,16 @@
 import config from "mc/config"
 import ChatWebSocketWorker from "ChatWebSocketWorker";
 
-const audioPrefix = Object.freeze(new Uint8Array(ArrayBuffer.fromString('{"realtimeInput":{"mediaChunks":[{"mimeType":"audio/pcm;rate=24000","data":"')), true);
-const audioSuffix = Object.freeze(new Uint8Array(ArrayBuffer.fromString('"}]}}')), true);
+const audioPrefixOld = Object.freeze(new Uint8Array(ArrayBuffer.fromString('{"realtimeInput":{"mediaChunks":[{"mimeType":"audio/pcm;rate=24000","data":"')), true);
+const audioSuffixOld = Object.freeze(new Uint8Array(ArrayBuffer.fromString('"}]}}')), true);
+const audioPrefix = Object.freeze(new Uint8Array(ArrayBuffer.fromString('{"realtimeInput":{"audio":{"mimeType":"audio/pcm;rate=24000","data":"')), true);
+const audioSuffix = Object.freeze(new Uint8Array(ArrayBuffer.fromString('"}}}')), true);
 
 export default class GoogleGeminiLiveModel extends ChatWebSocketWorker {
 	constructor(options) {
 		super(options);
 		this.host = "generativelanguage.googleapis.com";
 		this.headers = null;
-		this.audioPrefix = audioPrefix;
-		this.audioSuffix = audioSuffix;
 		this.speaking = true;
 	}
 	configure(message) {
@@ -39,6 +39,15 @@ export default class GoogleGeminiLiveModel extends ChatWebSocketWorker {
 		const voiceName = message.voiceID ?? "aoede";
 		const model = message.modelID ?? "gemini-2.5-flash-native-audio-preview-12-2025";
 		const apiKey = message.apiKey ?? config.geminiAPIKey;
+
+		if(model.includes("gemini-2")) {
+			this.audioPrefix = audioPrefixOld;
+			this.audioSuffix = audioSuffixOld;
+		} else {
+			this.audioPrefix = audioPrefix;
+			this.audioSuffix = audioSuffix;
+		}
+
 		this.path = `/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key=${apiKey}`;
 		this.setup = {
 			model: `models/${model}`,
@@ -63,7 +72,7 @@ export default class GoogleGeminiLiveModel extends ChatWebSocketWorker {
 		};
 	}
 	isBase64(result, current, name) {
-		return (current?.mimeType == "audio/pcm;rate=24000") && (name == "data");
+		return (current?.mimeType === "audio/pcm;rate=24000") && (name === "data");
 	}
 	onJSON(json) {
 		for (let key in json) {
@@ -113,7 +122,7 @@ export default class GoogleGeminiLiveModel extends ChatWebSocketWorker {
 	'serverContent'(data) {
 		const parts = data.modelTurn?.parts;
 		if (parts) {
-			const part = parts.find(part => part.inlineData?.mimeType == "audio/pcm;rate=24000");
+			const part = parts.find(part => part.inlineData?.mimeType === "audio/pcm;rate=24000");
 			if (part) {
 				if (this.speaking) {
 					this.postMessage({ id:"receiveInputText", text:"" });
@@ -145,7 +154,7 @@ export default class GoogleGeminiLiveModel extends ChatWebSocketWorker {
 		const functionCalls = data.functionCalls;
 		if (functionCalls) {
 			this.post("listen");
-			for (let functionCall of functionCalls) {
+			for (const functionCall of functionCalls) {
 				this.postMessage({ 
 					id:"receiveFunctionCall", 
 					call:functionCall.id,