Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions contributed/conversationalAI/assets.js
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,8 @@ const assets = {
providers: [
{ name:"Google", id:"GOOGLE",
models: [
{ name:"Gemini 2.5 Flash Live", id:"gemini-2.5-flash-native-audio-preview-12-2025" }
{ name:"Gemini 2.5 Flash Live", id:"gemini-2.5-flash-native-audio-preview-12-2025" },
{ name:"Gemini 3.1 Flash Live", id:"gemini-3.1-flash-live-preview" }
],
}
],
Expand Down Expand Up @@ -435,7 +436,8 @@ const assets = {
{ name:"OpenAI", id:"OPEN_AI",
models: [
{ name:"gpt-realtime", id:"gpt-realtime" },
{ name:"gpt-realtime-mini", id:"gpt-realtime-mini" }
{ name:"gpt-realtime-mini", id:"gpt-realtime-mini" },
{ name:"gpt-realtime-2", id:"gpt-realtime-2" },
],
}
],
Expand Down
2 changes: 1 addition & 1 deletion modules/network/services/chatAudioIO/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ The conversation library implements support for various AI cloud services using
- [Google Gemini Live](https://ai.google.dev/api/multimodal-live)
- [Hume Empathic Voice Interface](https://dev.hume.ai/docs/empathic-voice-interface-evi/overview)
- [Eleven Labs Conversational AI](https://elevenlabs.io/docs/conversational-ai/overview)
- [Deepgram Voice Agent](https://elevenlabs.io/docs/conversational-ai/overview)
- [Deepgram Voice Agent](https://deepgram.com/product/voice-agent-api)

## Programming Interface

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@
import config from "mc/config"
import ChatWebSocketWorker from "ChatWebSocketWorker";

const audioPrefix = Object.freeze(new Uint8Array(ArrayBuffer.fromString('{"realtimeInput":{"mediaChunks":[{"mimeType":"audio/pcm;rate=24000","data":"')), true);
const audioSuffix = Object.freeze(new Uint8Array(ArrayBuffer.fromString('"}]}}')), true);
const audioPrefixOld = Object.freeze(new Uint8Array(ArrayBuffer.fromString('{"realtimeInput":{"mediaChunks":[{"mimeType":"audio/pcm;rate=24000","data":"')), true);
const audioSuffixOld = Object.freeze(new Uint8Array(ArrayBuffer.fromString('"}]}}')), true);
const audioPrefix = Object.freeze(new Uint8Array(ArrayBuffer.fromString('{"realtimeInput":{"audio":{"mimeType":"audio/pcm;rate=24000","data":"')), true);
const audioSuffix = Object.freeze(new Uint8Array(ArrayBuffer.fromString('"}}}')), true);

export default class GoogleGeminiLiveModel extends ChatWebSocketWorker {
constructor(options) {
super(options);
this.host = "generativelanguage.googleapis.com";
this.headers = null;
this.audioPrefix = audioPrefix;
this.audioSuffix = audioSuffix;
this.speaking = true;
}
configure(message) {
Expand All @@ -39,6 +39,15 @@ export default class GoogleGeminiLiveModel extends ChatWebSocketWorker {
const voiceName = message.voiceID ?? "aoede";
const model = message.modelID ?? "gemini-2.5-flash-native-audio-preview-12-2025";
const apiKey = message.apiKey ?? config.geminiAPIKey;

if(model.includes("gemini-2")) {
this.audioPrefix = audioPrefixOld;
this.audioSuffix = audioSuffixOld;
} else {
this.audioPrefix = audioPrefix;
this.audioSuffix = audioSuffix;
}

this.path = `/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent?key=${apiKey}`;
this.setup = {
model: `models/${model}`,
Expand All @@ -63,7 +72,7 @@ export default class GoogleGeminiLiveModel extends ChatWebSocketWorker {
};
}
isBase64(result, current, name) {
return (current?.mimeType == "audio/pcm;rate=24000") && (name == "data");
return (current?.mimeType === "audio/pcm;rate=24000") && (name === "data");
}
onJSON(json) {
for (let key in json) {
Expand Down Expand Up @@ -113,7 +122,7 @@ export default class GoogleGeminiLiveModel extends ChatWebSocketWorker {
'serverContent'(data) {
const parts = data.modelTurn?.parts;
if (parts) {
const part = parts.find(part => part.inlineData?.mimeType == "audio/pcm;rate=24000");
const part = parts.find(part => part.inlineData?.mimeType === "audio/pcm;rate=24000");
if (part) {
if (this.speaking) {
this.postMessage({ id:"receiveInputText", text:"" });
Expand Down Expand Up @@ -145,7 +154,7 @@ export default class GoogleGeminiLiveModel extends ChatWebSocketWorker {
const functionCalls = data.functionCalls;
if (functionCalls) {
this.post("listen");
for (let functionCall of functionCalls) {
for (const functionCall of functionCalls) {
this.postMessage({
id:"receiveFunctionCall",
call:functionCall.id,
Expand Down