Skip to content

Commit ad5fee2

Browse files
committed
chore: gemini 3 changes.
1 parent 239adea commit ad5fee2

3 files changed

Lines changed: 68 additions & 36 deletions

File tree

gemini-live-ephemeral-tokens-websocket/frontend/geminilive.js

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -323,18 +323,18 @@ class GeminiLiveAPI {
323323
},
324324
systemInstruction: { parts: [{ text: this.systemInstructions }] },
325325
tools: { functionDeclarations: tools },
326-
proactivity: this.proactivity,
327-
328-
realtimeInputConfig: {
329-
automaticActivityDetection: {
330-
disabled: this.automaticActivityDetection.disabled,
331-
silenceDurationMs: this.automaticActivityDetection.silence_duration_ms,
332-
prefixPaddingMs: this.automaticActivityDetection.prefix_padding_ms,
333-
endOfSpeechSensitivity: this.automaticActivityDetection.end_of_speech_sensitivity,
334-
startOfSpeechSensitivity: this.automaticActivityDetection.start_of_speech_sensitivity,
335-
},
336-
activityHandling: this.activityHandling,
337-
},
326+
// proactivity: this.proactivity,
327+
328+
// realtimeInputConfig: {
329+
// automaticActivityDetection: {
330+
// disabled: this.automaticActivityDetection.disabled,
331+
// silenceDurationMs: this.automaticActivityDetection.silence_duration_ms,
332+
// prefixPaddingMs: this.automaticActivityDetection.prefix_padding_ms,
333+
// endOfSpeechSensitivity: this.automaticActivityDetection.end_of_speech_sensitivity,
334+
// startOfSpeechSensitivity: this.automaticActivityDetection.start_of_speech_sensitivity,
335+
// },
336+
// activityHandling: this.activityHandling,
337+
// },
338338
},
339339
};
340340

@@ -356,9 +356,9 @@ class GeminiLiveAPI {
356356
}
357357

358358
// Add affective dialog if enabled
359-
if (this.enableAffectiveDialog) {
360-
sessionSetupMessage.setup.generationConfig.enableAffectiveDialog = true;
361-
}
359+
// if (this.enableAffectiveDialog) {
360+
// sessionSetupMessage.setup.generationConfig.enableAffectiveDialog = true;
361+
// }
362362

363363
// Store the setup message for later access
364364
this.lastSetupMessage = sessionSetupMessage;
@@ -394,16 +394,15 @@ class GeminiLiveAPI {
394394
}
395395

396396
sendRealtimeInputMessage(data, mimeType) {
397-
const message = {
398-
realtimeInput: {
399-
mediaChunks: [
400-
{
401-
mimeType: mimeType,
402-
data: data,
403-
},
404-
],
405-
},
406-
};
397+
const blob = { mimeType, data };
398+
const message = { realtimeInput: {} };
399+
400+
if (mimeType.startsWith("audio/")) {
401+
message.realtimeInput.audio = blob;
402+
} else if (mimeType.startsWith("image/") || mimeType.startsWith("video/")) {
403+
message.realtimeInput.video = blob;
404+
}
405+
407406
this.sendMessage(message);
408407
this.addToBytesSent(data);
409408
}

gemini-live-genai-python-sdk/gemini_live.py

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import asyncio
22
import inspect
33
import logging
4+
import traceback
45

56
logger = logging.getLogger(__name__)
67
from google import genai
@@ -24,7 +25,7 @@ def __init__(self, api_key, model, input_sample_rate, tools=None, tool_mapping=N
2425
self.api_key = api_key
2526
self.model = model
2627
self.input_sample_rate = input_sample_rate
27-
self.client = genai.Client(api_key=api_key, http_options={"api_version": "v1alpha"})
28+
self.client = genai.Client(api_key=api_key)
2829
self.tools = tools or []
2930
self.tool_mapping = tool_mapping or {}
3031

@@ -41,12 +42,15 @@ async def start_session(self, audio_input_queue, video_input_queue, text_input_q
4142
system_instruction=types.Content(parts=[types.Part(text="You are a helpful AI assistant. Keep your responses concise. Speak in a friendly Irish accent. You can see the user's camera or screen which is shared as realtime input images with you.")]),
4243
input_audio_transcription=types.AudioTranscriptionConfig(),
4344
output_audio_transcription=types.AudioTranscriptionConfig(),
44-
proactivity=types.ProactivityConfig(proactive_audio=True),
45-
enable_affective_dialog=True,
45+
#proactivity=types.ProactivityConfig(proactive_audio=True),
46+
#enable_affective_dialog=True,
4647
tools=self.tools,
4748
)
4849

49-
async with self.client.aio.live.connect(model=self.model, config=config) as session:
50+
logger.info(f"Connecting to Gemini Live with model={self.model}")
51+
try:
52+
async with self.client.aio.live.connect(model=self.model, config=config) as session:
53+
logger.info("Gemini Live session opened successfully")
5054

5155
async def send_audio():
5256
try:
@@ -56,18 +60,22 @@ async def send_audio():
5660
audio=types.Blob(data=chunk, mime_type=f"audio/pcm;rate={self.input_sample_rate}")
5761
)
5862
except asyncio.CancelledError:
59-
pass
63+
logger.debug("send_audio task cancelled")
64+
except Exception as e:
65+
logger.error(f"send_audio error: {e}\n{traceback.format_exc()}")
6066

6167
async def send_video():
6268
try:
6369
while True:
6470
chunk = await video_input_queue.get()
6571
logger.info(f"Sending video frame to Gemini: {len(chunk)} bytes")
6672
await session.send_realtime_input(
67-
media=types.Blob(data=chunk, mime_type="image/jpeg")
73+
video=types.Blob(data=chunk, mime_type="image/jpeg")
6874
)
6975
except asyncio.CancelledError:
70-
pass
76+
logger.debug("send_video task cancelled")
77+
except Exception as e:
78+
logger.error(f"send_video error: {e}\n{traceback.format_exc()}")
7179

7280
async def send_text():
7381
try:
@@ -76,7 +84,9 @@ async def send_text():
7684
logger.info(f"Sending text to Gemini: {text}")
7785
await session.send_realtime_input(text=text)
7886
except asyncio.CancelledError:
79-
pass
87+
logger.debug("send_text task cancelled")
88+
except Exception as e:
89+
logger.error(f"send_text error: {e}\n{traceback.format_exc()}")
8090

8191
event_queue = asyncio.Queue()
8292

@@ -85,6 +95,13 @@ async def receive_loop():
8595
while True:
8696
async for response in session.receive():
8797
logger.debug(f"Received response from Gemini: {response}")
98+
99+
# Log the raw response type for debugging
100+
if response.go_away:
101+
logger.warning(f"Received GoAway from Gemini: {response.go_away}")
102+
if response.session_resumption_update:
103+
logger.info(f"Session resumption update: {response.session_resumption_update}")
104+
88105
server_content = response.server_content
89106
tool_call = response.tool_call
90107

@@ -139,10 +156,17 @@ async def receive_loop():
139156
await event_queue.put({"type": "tool_call", "name": func_name, "args": args, "result": result})
140157

141158
await session.send_tool_response(function_responses=function_responses)
159+
160+
# session.receive() iterator ended (e.g. after turn_complete) — re-enter to keep listening
161+
logger.debug("Gemini receive iterator completed, re-entering receive loop")
142162

163+
except asyncio.CancelledError:
164+
logger.debug("receive_loop task cancelled")
143165
except Exception as e:
144-
await event_queue.put({"type": "error", "error": str(e)})
166+
logger.error(f"receive_loop error: {type(e).__name__}: {e}\n{traceback.format_exc()}")
167+
await event_queue.put({"type": "error", "error": f"{type(e).__name__}: {e}"})
145168
finally:
169+
logger.info("receive_loop exiting")
146170
await event_queue.put(None)
147171

148172
send_audio_task = asyncio.create_task(send_audio())
@@ -161,7 +185,13 @@ async def receive_loop():
161185
break
162186
yield event
163187
finally:
188+
logger.info("Cleaning up Gemini Live session tasks")
164189
send_audio_task.cancel()
165190
send_video_task.cancel()
166191
send_text_task.cancel()
167192
receive_task.cancel()
193+
except Exception as e:
194+
logger.error(f"Gemini Live session error: {type(e).__name__}: {e}\n{traceback.format_exc()}")
195+
raise
196+
finally:
197+
logger.info("Gemini Live session closed")

gemini-live-genai-python-sdk/main.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,10 @@
1414
# Load environment variables
1515
load_dotenv()
1616

17-
# Configure logging
17+
# Configure logging - DEBUG for our modules, INFO for everything else
1818
logging.basicConfig(level=logging.INFO)
19+
logging.getLogger("gemini_live").setLevel(logging.DEBUG)
20+
logging.getLogger(__name__).setLevel(logging.DEBUG)
1921
logger = logging.getLogger(__name__)
2022

2123
# Configuration
@@ -106,7 +108,8 @@ async def run_session():
106108
try:
107109
await run_session()
108110
except Exception as e:
109-
logger.error(f"Error in Gemini session: {e}")
111+
import traceback
112+
logger.error(f"Error in Gemini session: {type(e).__name__}: {e}\n{traceback.format_exc()}")
110113
finally:
111114
receive_task.cancel()
112115
# Ensure websocket is closed if not already

0 commit comments

Comments
 (0)