Add input/output transcription to live audio example

makelinux · claude · makelinux · commit 6683121b5f34 · 2026-04-17T00:57:56.000+03:00
Enable text transcription alongside audio streaming.
Output transcription prints inline, input transcription
in italic. Newlines separate input from output and after
sentence-ending punctuation.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
Signed-off-by: Costa Shulyupin &lt;costa.shul@redhat.com&gt;
diff --git a/command-line/python/main.py b/command-line/python/main.py
@@ -18,6 +18,8 @@
 CONFIG = {
     "response_modalities": ["AUDIO"],
     "system_instruction": "You are a helpful and friendly AI assistant.",
+    "output_audio_transcription": {},
+    "input_audio_transcription": {},
 }
 
 audio_queue_output = asyncio.Queue()
@@ -50,13 +52,33 @@ async def send_realtime(session):
 
 async def receive_audio(session):
     """Receives responses from GenAI and puts audio data into the speaker audio queue."""
+    last_was_input = False
     while True:
         turn = session.receive()
         async for response in turn:
-            if (response.server_content and response.server_content.model_turn):
-                for part in response.server_content.model_turn.parts:
+            sc = response.server_content
+            if not sc:
+                continue
+            if sc.model_turn:
+                for part in sc.model_turn.parts:
                     if part.inline_data and isinstance(part.inline_data.data, bytes):
                         audio_queue_output.put_nowait(part.inline_data.data)
+            if sc.output_transcription:
+                if last_was_input:
+                    print()
+                    last_was_input = False
+                t = sc.output_transcription.text
+                print(t, end="", flush=True)
+                if t.rstrip()[-1:] in '.!?':
+                    print()
+            if sc.input_transcription:
+                if not last_was_input:
+                    print()
+                    last_was_input = True
+                t = sc.input_transcription.text
+                print(f"\033[3m{t}\033[0m", end="", flush=True)
+                if t.rstrip()[-1:] in '.!?':
+                    print()
 
         # Empty the queue on interruption to stop playback
         while not audio_queue_output.empty():