Skip to content

Commit 251aa78

Browse files
committed
feat: Update default model to gemini-3.1-flash-live-preview, remove proactive audio and affective dialog features, and refactor turn_coverage into realtimeInputConfig.
1 parent 7ee9cf4 commit 251aa78

5 files changed

Lines changed: 21 additions & 40 deletions

File tree

command-line/node/main.mts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ const ai = new GoogleGenAI({});
88
// More information at: https://ai.google.dev/gemini-api/docs/ephemeral-tokens
99

1010
// --- Live API config ---
11-
const model = 'gemini-2.5-flash-native-audio-preview-12-2025';
11+
const model = 'gemini-3.1-flash-live-preview';
1212
const config = {
1313
responseModalities: [Modality.AUDIO],
1414
systemInstruction: "You are a helpful and friendly AI assistant.",

gemini-live-ephemeral-tokens-websocket/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class MyTool extends FunctionCallDefinition {
9292

9393
## Configuration Options
9494

95-
- **Model**: `gemini-3.1-flash-audio-eap` (default)
95+
- **Model**: `gemini-3.1-flash-live-preview` (default)
9696
- **Voice**: Puck, Charon, Kore, Fenrir, Aoede
9797
- **Response**: Audio, text, or both
9898
- **Tools**: Custom functions or Google Search grounding

gemini-live-ephemeral-tokens-websocket/frontend/geminilive.js

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -144,10 +144,8 @@ class GeminiLiveAPI {
144144
this.responseModalities = ["AUDIO"];
145145
this.systemInstructions = "";
146146
this.googleGrounding = false;
147-
this.enableAffectiveDialog = false; // Default affective dialog
148147
this.voiceName = "Puck"; // Default voice
149148
this.temperature = 1.0; // Default temperature
150-
this.proactivity = { proactiveAudio: false }; // Proactivity config
151149
this.inputAudioTranscription = false;
152150
this.outputAudioTranscription = false;
153151
this.enableFunctionCalls = false;
@@ -218,10 +216,7 @@ class GeminiLiveAPI {
218216
this.voiceName = voiceName;
219217
}
220218

221-
setProactivity(proactivity) {
222-
console.log("setting proactivity: ", proactivity);
223-
this.proactivity = proactivity;
224-
}
219+
225220

226221
setInputAudioTranscription(enabled) {
227222
console.log("setting input audio transcription: ", enabled);
@@ -346,19 +341,19 @@ class GeminiLiveAPI {
346341
},
347342
systemInstruction: { parts: [{ text: this.systemInstructions }] },
348343
tools: [{ functionDeclarations: tools }],
349-
turnCoverage: "TURN_INCLUDES_ONLY_ACTIVITY",
350-
// proactivity: this.proactivity,
351-
352-
// realtimeInputConfig: {
353-
// automaticActivityDetection: {
354-
// disabled: this.automaticActivityDetection.disabled,
355-
// silenceDurationMs: this.automaticActivityDetection.silence_duration_ms,
356-
// prefixPaddingMs: this.automaticActivityDetection.prefix_padding_ms,
357-
// endOfSpeechSensitivity: this.automaticActivityDetection.end_of_speech_sensitivity,
358-
// startOfSpeechSensitivity: this.automaticActivityDetection.start_of_speech_sensitivity,
359-
// },
360-
// activityHandling: this.activityHandling,
361-
// },
344+
345+
346+
realtimeInputConfig: {
347+
automaticActivityDetection: {
348+
disabled: this.automaticActivityDetection.disabled,
349+
silenceDurationMs: this.automaticActivityDetection.silence_duration_ms,
350+
prefixPaddingMs: this.automaticActivityDetection.prefix_padding_ms,
351+
endOfSpeechSensitivity: this.automaticActivityDetection.end_of_speech_sensitivity,
352+
startOfSpeechSensitivity: this.automaticActivityDetection.start_of_speech_sensitivity,
353+
},
354+
activityHandling: this.activityHandling,
355+
turnCoverage: "TURN_INCLUDES_ONLY_ACTIVITY",
356+
},
362357
},
363358
};
364359

@@ -378,10 +373,7 @@ class GeminiLiveAPI {
378373
sessionSetupMessage.setup.tools = [{ googleSearch: {} }];
379374
}
380375

381-
// Add affective dialog if enabled
382-
// if (this.enableAffectiveDialog) {
383-
// sessionSetupMessage.setup.generationConfig.enableAffectiveDialog = true;
384-
// }
376+
385377

386378
// Store the setup message for later access
387379
this.lastSetupMessage = sessionSetupMessage;

gemini-live-ephemeral-tokens-websocket/frontend/index.html

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ <h2>API Configuration</h2>
8585

8686
<div>
8787
<label for="model">Model ID:</label><br />
88-
<input type="text" id="model" value="gemini-3.1-flash-audio-eap" placeholder="Enter model ID" />
88+
<input type="text" id="model" value="gemini-3.1-flash-live-preview" placeholder="Enter model ID" />
8989
</div>
9090
</details>
9191

@@ -118,24 +118,12 @@ <h2>API Configuration</h2>
118118
Lower = more predictable/focused</small>
119119
</div>
120120

121-
<div>
122-
<input type="checkbox" id="enableProactiveAudio" checked />
123-
<label for="enableProactiveAudio">Enable proactive audio (Gemini will ignore speech based on
124-
instructions)</label>
125-
</div>
126-
127121
<div>
128122
<input type="checkbox" id="enableGrounding" />
129123
<label for="enableGrounding">Enable Google grounding (Enabling Google grounding will disable
130124
custom tools)
131125
</label>
132126
</div>
133-
134-
<div>
135-
<input type="checkbox" id="enableAffectiveDialog" checked />
136-
<label for="enableAffectiveDialog">Enable affective dialog (emotion detection and empathetic
137-
responses)</label>
138-
</div>
139127
</details>
140128

141129
<!-- Custom Tools -->

gemini-live-genai-python-sdk/gemini_live.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ async def start_session(self, audio_input_queue, video_input_queue, text_input_q
4242
system_instruction=types.Content(parts=[types.Part(text="You are a helpful AI assistant. Keep your responses concise. Speak in a friendly Irish accent. You can see the user's camera or screen which is shared as realtime input images with you.")]),
4343
input_audio_transcription=types.AudioTranscriptionConfig(),
4444
output_audio_transcription=types.AudioTranscriptionConfig(),
45-
turn_coverage="TURN_INCLUDES_ONLY_ACTIVITY",
46-
45+
realtime_input_config=types.RealtimeInputConfig(
46+
turn_coverage="TURN_INCLUDES_ONLY_ACTIVITY",
47+
),
4748
tools=self.tools,
4849
)
4950

0 commit comments

Comments
 (0)